void parameters::Estimation_continue(int nbiter,datafile dat, model mod){ Probapost(mod, dat.Get_mat_datafile()); for (int it=0;it<nbiter;it++){ Mstep(dat, mod); Probapost(mod, dat.Get_mat_datafile()); } Likelihood(dat.Get_eff_datafile()); }
void XEMModel::oneRunOfSmallEM(XEMClusteringStrategyInit * clusteringStrategyInit, double & logLikelihood) { double lastLogLikelihood, eps; eps = 1000; initRANDOM(1); Estep(); Mstep(); logLikelihood = getLogLikelihood(true); // true : to compute fik int64_t nbIteration = 1; bool continueAgain = true; while (continueAgain) { // cout<<"while de oneRunOfSmallEM, nbIteration = "<<nbIteration<<endl; //(nbIteration < strategyInit->getNbIteration()) && (eps > strategyInit->getEpsilon())){ lastLogLikelihood = logLikelihood; Estep(); Mstep(); nbIteration++; // update continueAgain switch (clusteringStrategyInit->getStopName()) { case NBITERATION : continueAgain = (nbIteration < clusteringStrategyInit->getNbIteration()); break; case EPSILON : logLikelihood = getLogLikelihood(true); // true : to compute fik eps = fabs(logLikelihood - lastLogLikelihood); //continueAgain = (eps > strategyInit->getEpsilon()); continueAgain = (eps > clusteringStrategyInit->getEpsilon() && (nbIteration < maxNbIterationInInit)); // on ajoute un test pour ne pas faire trop d'iterations quand meme .... break; case NBITERATION_EPSILON : logLikelihood = getLogLikelihood(true); // true : to compute fi eps = fabs(logLikelihood - lastLogLikelihood); continueAgain = ((eps > clusteringStrategyInit->getEpsilon()) && (nbIteration < clusteringStrategyInit->getNbIteration())); break; default : throw internalMixmodError; } } if (clusteringStrategyInit->getStopName() == NBITERATION) { // logLikelihood is an output logLikelihood = getLogLikelihood(true); // true : to compute fi } //cout<<"Fin de oneRunOfSmallEM, nb d'iterations effectuees = "<<nbIteration<<", logLikelihood = "<<logLikelihood<<endl; }
void XEM::OneEM(){ double loglike = ComputeLogLike(), prec = log(0); int it=0; while ( (it<iterCurrent) && ((loglike-prec)>p_results->p_strategy->m_tolKeep) ){ it ++; Estep(); Mstep(); prec = loglike; loglike = ComputeLogLike(); } // Une verif if (prec>(loglike+p_results->p_strategy->m_tolKeep)) cout << "pb EM " << "prec" << prec << " loglike " << loglike << endl;; }
void XEMModel::initSEM_MAX(XEMStrategyInit * strategyInit) { //cout<<"init SEM_MAX, nbTryInInit="<<strategyInit->getNbIteration()<<endl; _algoName = SEM; int64_t j; double logLikelihood, bestLogLikelihood; XEMParameter * bestParameter = _parameter->clone(); int64_t nbRunOfSEMMAXOk = 0; bestLogLikelihood = 0.0; int64_t bestIndex=0; for (j=0; j<strategyInit->getNbIteration(); j++) { nbRunOfSEMMAXOk++; try { _parameter->reset(); initRANDOM(1); Estep(); Sstep(); Mstep(); // Compute log-likelihood logLikelihood = getLogLikelihood(true); // true : to compute fik if ((nbRunOfSEMMAXOk==1) || (logLikelihood > bestLogLikelihood)) { bestLogLikelihood = logLikelihood; bestParameter->recopy(_parameter); bestIndex = j; } } catch (XEMErrorType errorType) { nbRunOfSEMMAXOk--; } } if (nbRunOfSEMMAXOk==0) { throw SEM_MAX_error; } //cout<<"fin de init SEM_MAX, nb d'iterations effectuees="<<j<<" meilleure solution : "<<bestIndex<<endl; // set best parameter delete _parameter; _parameter = bestParameter; _parameter->setModel(this); }
BNTree* train_bntree (char *treestr, char *modelstr, char *ss_filename, char *model_name, char *output_filename) { FILE *ss_fp, *out_fp; int model_type; double *background_probs; char **tuple_arr; double *count_arr; int num_tuples = 0; double ll; double last_ll; int iteration; int converged; char tag_str[STRLEN]; char val_str[STRLEN]; int u; int i, j, k; Node *node; int param_num; double row_sum; double param_val; CellListItem *cli; BNTree *bntree = NewBNTree(0); /* no cache for training (for now) */ strcpy (bntree->name, model_name); if (strcmp (modelstr, "TT0") == 0) { model_type = TT0; bntree->order = 0; bntree->num_states = 6; } else if (strcmp (modelstr, "TT1") == 0) { model_type = TT1; bntree->order = 1; bntree->num_states = 36; } else if (strcmp (modelstr, "TT2") == 0) { model_type = TT2; bntree->order = 2; bntree->num_states = 216; } else if (strcmp (modelstr, "R0") == 0) { model_type = R0; bntree->order = 0; bntree->num_states = 6; bntree->num_params = 18; } else if (strcmp (modelstr, "R1") == 0) { model_type = R1; bntree->order = 1; bntree->num_states = 36; bntree->num_params = 236; } else if (strcmp (modelstr, "R2") == 0) { model_type = R2; bntree->order = 2; bntree->num_states = 216; } else if (strcmp (modelstr, "G0") == 0) { model_type = G0; bntree->order = 0; bntree->num_states = 6; bntree->num_params = 36; } else if (strcmp (modelstr, "G1") == 0) { model_type = G1; bntree->order = 1; bntree->num_states = 36; bntree->num_params = 1296; } else if (strcmp (modelstr, "G2") == 0) { model_type = G2; bntree->order = 2; bntree->num_states = 216; bntree->num_params = 46656; } else { fprintf (stderr, "Unknown probability model type: %s\n", modelstr); exit(-1); } /* initialize tree structure */ parse_tree_string (bntree, treestr); /* parse the sufficient statistics file */ ss_fp = fopen (ss_filename, "r"); while (fscanf (ss_fp, "%s = %s\n", tag_str, val_str) == 2) { if (strcmp (tag_str, "NTUPLES") == 0) num_tuples = atoi(val_str); } fclose (ss_fp); tuple_arr = (char **) malloc (num_tuples * sizeof (char *)); for (i=0; i<num_tuples; i++) tuple_arr[i] = (char *) malloc (STRLEN * sizeof (char)); count_arr = (double *) malloc (num_tuples * sizeof (double)); background_probs = (double *) malloc (bntree->num_states * sizeof (double)); parse_ss (bntree, ss_filename, tuple_arr, count_arr, background_probs); /* initialize parameters */ bntree->param_map = (int **)malloc(bntree->num_states * sizeof(int *)); for (i=0; i<bntree->num_states; i++) bntree->param_map[i] = (int *)malloc(bntree->num_states * sizeof(int)); bntree->weight_idx = (int **)malloc(bntree->num_states * sizeof(int *)); for (i=0; i<bntree->num_states; i++) bntree->weight_idx[i] = (int *)malloc(bntree->num_states * sizeof(int)); if (model_type == R0) init_params_R0 (bntree); else if (model_type == G0) init_params_G0 (bntree); else if (model_type == R1) init_params_R1 (bntree); else if (model_type == G1) init_params_G1 (bntree); else if (model_type == G2) init_params_G2 (bntree); else { fprintf (stderr, "Model type %s not yet supported\n", modelstr); exit (-1); } /* create inverse parameter map */ bntree->inv_param_map = (CellListItem **)malloc(bntree->num_params * sizeof (CellListItem *)); for (i=0; i<bntree->num_params; i++) bntree->inv_param_map[i] = NewCellList(); for (i=0; i<bntree->num_states; i++) { for (j=0; j<bntree->num_states; j++) { param_num = bntree->param_map[i][j]; cell_list_append(bntree->inv_param_map[param_num], i, j); } } /* allocate local CPDs and expectations */ for (u=0; u<bntree->num_nodes; u++) { node = bntree->preorder[u]; if (node->parent == NULL) continue; node->E = (double **)malloc(bntree->num_states * sizeof(double *)); for (i=0; i<bntree->num_states; i++) node->E[i] = (double *)malloc(bntree->num_states * sizeof(double)); node->P = (float **)malloc(bntree->num_states * sizeof(float *)); for (i=0; i<bntree->num_states; i++) node->P[i] = (float *)malloc(bntree->num_states * sizeof(float)); node->W = (double **)malloc(bntree->num_states * sizeof(double *)); for (i=0; i<bntree->num_states; i++) node->W[i] = (double *)malloc(bntree->num_states * sizeof(double)); } /* randomly initialize the parameters in a way that guarantees all probabilities will be within 0 and 1, and all rows sum to 1 */ /* also, self substitution probs will be at least 0.5 */ for (u=0; u<bntree->num_nodes; u++) { node = bntree->preorder[u]; if (node->parent == NULL) continue; srand(time(NULL)); for (k=0; k<bntree->num_params - bntree->num_states; k++) { param_val = bounded_random (0.05, 0.50) / (bntree->num_states - 1); for (cli = bntree->inv_param_map[k]; cli != NULL; cli = cli->next) { i = cli->row; j = cli->col; node->P[i][j] = param_val; } } for (i=0; i<bntree->num_states; i++) { row_sum = 0; for (j=0; j<bntree->num_states; j++) { if (j != i) { row_sum += node->P[i][j]; } } node->P[i][i] = 1 - row_sum; } } /* run EM algorithm */ /* We'll output the current tree after every iteration */ iteration = 0; converged = 0; ll = calc_log_likelihood (bntree, tuple_arr, count_arr, num_tuples, 0); fprintf (stderr, "Initial log likelihood: %f\n", ll); while (! converged) { iteration++; /* improve likelihood */ Estep(bntree, tuple_arr, count_arr, num_tuples); Mstep(bntree); /* output new model */ out_fp = fopen (output_filename, "w"); print_tree (out_fp, bntree); fclose (out_fp); /* check for convergence */ last_ll = ll; ll = calc_log_likelihood (bntree, tuple_arr, count_arr, num_tuples, 0); fprintf (stderr, "EM iteration %d, log likelihood: %f\n", iteration, ll); if ( (last_ll - ll) / last_ll < EM_TOL || iteration >= MAX_EM_ITERS) converged = 1; } return bntree; }
void XEMModel::initCEM_INIT(XEMStrategyInit * strategyInit) { //cout<<"init CEM, nbTryInInit="<<strategyInit->getNbTry()<<endl; _algoName = CEM; int64_t i,j; double cLogLikelihood, oldLogLikelihood, bestCLogLikelihood; XEMParameter * bestParameter = _parameter->clone(); int64_t nbRunOfCEMOk = 0; bestCLogLikelihood = 0.0; for (i=0; i<strategyInit->getNbTry(); i++) { nbRunOfCEMOk++; try { _parameter->reset(); // reset to default values initRANDOM(1); _algoName = CEM; int64_t nbIter = 0; bool fin = false; while (!fin && nbIter<=maxNbIterationInCEM_INIT) { Estep(); Cstep(); Mstep(); nbIter++; if (nbIter == 1) { oldLogLikelihood = getCompletedLogLikelihood(); } else { cLogLikelihood = getCompletedLogLikelihood(); if (cLogLikelihood == oldLogLikelihood) { fin = true; } else { oldLogLikelihood = cLogLikelihood; } } } //cout<<"dans init CEM, nb d'iterations effectuées : "<<nbIter<<endl; // Compute log-likelihood cLogLikelihood = getCompletedLogLikelihood(); // Comparaison of log-likelihood between step p and p-1 if ((nbRunOfCEMOk==1) || (cLogLikelihood > bestCLogLikelihood)) { bestCLogLikelihood = cLogLikelihood; bestParameter->recopy(_parameter); } //cout<<"nbIter : "<<nbIter<<endl; } catch (XEMErrorType errorType) { nbRunOfCEMOk--; } } if (nbRunOfCEMOk==0) { delete _parameter; _parameter = bestParameter; _parameter->setModel(this); throw CEM_INIT_error; } //cout<<"fin de init CEM, nb d'essais effectues="<<i<<endl; // set Best parameter delete _parameter; _parameter = bestParameter; _parameter->setModel(this); }