Exemplo n.º 1
0
void parameters::Estimation_continue(int nbiter,datafile dat, model mod){
	Probapost(mod, dat.Get_mat_datafile());
	for (int it=0;it<nbiter;it++){
		Mstep(dat, mod);
		Probapost(mod, dat.Get_mat_datafile());
	}
	Likelihood(dat.Get_eff_datafile());
}
Exemplo n.º 2
0
void XEMModel::oneRunOfSmallEM(XEMClusteringStrategyInit * clusteringStrategyInit, double & logLikelihood) {
    double lastLogLikelihood, eps;
    eps = 1000;
    initRANDOM(1);
    Estep();
    Mstep();
    logLikelihood = getLogLikelihood(true);  // true : to compute fik
    int64_t  nbIteration = 1;
    bool continueAgain = true;
    while (continueAgain) {
        //    cout<<"while de oneRunOfSmallEM, nbIteration = "<<nbIteration<<endl;
        //(nbIteration < strategyInit->getNbIteration()) && (eps > strategyInit->getEpsilon())){
        lastLogLikelihood = logLikelihood;
        Estep();
        Mstep();
        nbIteration++;
        // update continueAgain
        switch (clusteringStrategyInit->getStopName()) {
        case NBITERATION :
            continueAgain = (nbIteration < clusteringStrategyInit->getNbIteration());
            break;
        case EPSILON :
            logLikelihood = getLogLikelihood(true);  // true : to compute fik
            eps = fabs(logLikelihood - lastLogLikelihood);
            //continueAgain = (eps > strategyInit->getEpsilon());
            continueAgain = (eps > clusteringStrategyInit->getEpsilon() && (nbIteration < maxNbIterationInInit)); // on ajoute un test pour ne pas faire trop d'iterations quand meme ....
            break;
        case NBITERATION_EPSILON :
            logLikelihood = getLogLikelihood(true);  // true : to compute fi
            eps = fabs(logLikelihood - lastLogLikelihood);
            continueAgain = ((eps > clusteringStrategyInit->getEpsilon()) && (nbIteration < clusteringStrategyInit->getNbIteration()));
            break;
        default :
            throw internalMixmodError;
        }
    }
    if (clusteringStrategyInit->getStopName() == NBITERATION) { // logLikelihood is an output
        logLikelihood = getLogLikelihood(true);  // true : to compute fi
    }
    //cout<<"Fin de oneRunOfSmallEM, nb d'iterations effectuees = "<<nbIteration<<", logLikelihood = "<<logLikelihood<<endl;
}
Exemplo n.º 3
0
void XEM::OneEM(){
  double loglike = ComputeLogLike(), prec = log(0);
  int it=0;
  while ( (it<iterCurrent) && ((loglike-prec)>p_results->p_strategy->m_tolKeep) ){
    it ++;
    Estep();
    Mstep();
    prec = loglike;
    loglike = ComputeLogLike();
  }
  // Une verif
  if (prec>(loglike+p_results->p_strategy->m_tolKeep)) cout << "pb EM " << "prec" << prec << " loglike " << loglike << endl;;
}
Exemplo n.º 4
0
void XEMModel::initSEM_MAX(XEMStrategyInit * strategyInit) {
    //cout<<"init SEM_MAX, nbTryInInit="<<strategyInit->getNbIteration()<<endl;
    _algoName = SEM;
    int64_t  j;
    double logLikelihood, bestLogLikelihood;
    XEMParameter * bestParameter = _parameter->clone();
    int64_t  nbRunOfSEMMAXOk = 0;
    bestLogLikelihood = 0.0;
    int64_t  bestIndex=0;

    for (j=0; j<strategyInit->getNbIteration(); j++) {
        nbRunOfSEMMAXOk++;
        try {
            _parameter->reset();
            initRANDOM(1);
            Estep();
            Sstep();
            Mstep();
            // Compute log-likelihood
            logLikelihood = getLogLikelihood(true);  // true : to compute fik

            if ((nbRunOfSEMMAXOk==1) || (logLikelihood > bestLogLikelihood)) {
                bestLogLikelihood = logLikelihood;
                bestParameter->recopy(_parameter);
                bestIndex = j;
            }
        }
        catch (XEMErrorType errorType) {
            nbRunOfSEMMAXOk--;
        }
    }

    if (nbRunOfSEMMAXOk==0) {
        throw SEM_MAX_error;
    }

    //cout<<"fin de init SEM_MAX, nb d'iterations effectuees="<<j<<" meilleure solution : "<<bestIndex<<endl;
    // set best parameter
    delete _parameter;
    _parameter = bestParameter;
    _parameter->setModel(this);
}
Exemplo n.º 5
0
BNTree* train_bntree (char *treestr, char *modelstr, char *ss_filename, char *model_name, char *output_filename) {
  FILE *ss_fp, *out_fp;
  int model_type;
  double *background_probs;
  char **tuple_arr;
  double *count_arr;
  int num_tuples = 0;
  double ll;
  double last_ll;
  int iteration;
  int converged;
  char tag_str[STRLEN];
  char val_str[STRLEN];
  int u;
  int i, j, k;
  Node *node;
  int param_num;
  double row_sum;
  double param_val;
  CellListItem *cli;

  BNTree *bntree = NewBNTree(0); /* no cache for training (for now) */

  strcpy (bntree->name, model_name);

  if (strcmp (modelstr, "TT0") == 0) {
    model_type = TT0;
    bntree->order = 0;
    bntree->num_states = 6;
  }
  else if (strcmp (modelstr, "TT1") == 0) {
    model_type = TT1;
    bntree->order = 1;
    bntree->num_states = 36;
  }
  else if (strcmp (modelstr, "TT2") == 0) {
    model_type = TT2;
    bntree->order = 2;
    bntree->num_states = 216;
  }
  else if (strcmp (modelstr, "R0") == 0) {
    model_type = R0;
    bntree->order = 0;
    bntree->num_states = 6;
    bntree->num_params = 18;
  }
  else if (strcmp (modelstr, "R1") == 0) {
    model_type = R1;
    bntree->order = 1;
    bntree->num_states = 36;
    bntree->num_params = 236;
  }
  else if (strcmp (modelstr, "R2") == 0) {
    model_type = R2;
    bntree->order = 2;
    bntree->num_states = 216;
  }
  else if (strcmp (modelstr, "G0") == 0) {
    model_type = G0;
    bntree->order = 0;
    bntree->num_states = 6;
    bntree->num_params = 36;
  }
  else if (strcmp (modelstr, "G1") == 0) {
    model_type = G1;
    bntree->order = 1;
    bntree->num_states = 36;
    bntree->num_params = 1296;
  }
  else if (strcmp (modelstr, "G2") == 0) {
    model_type = G2;
    bntree->order = 2;
    bntree->num_states = 216;
    bntree->num_params = 46656;
  }
  else {
    fprintf (stderr, "Unknown probability model type: %s\n", modelstr);
    exit(-1);
  }
    
  /* initialize tree structure */
  parse_tree_string (bntree, treestr);

  /* parse the sufficient statistics file */
  ss_fp = fopen (ss_filename, "r");
  while (fscanf (ss_fp, "%s = %s\n", tag_str, val_str) == 2) {
    if (strcmp (tag_str, "NTUPLES") == 0)
	num_tuples = atoi(val_str);
  }
  fclose (ss_fp);

  tuple_arr = (char **) malloc (num_tuples * sizeof (char *));
  for (i=0; i<num_tuples; i++)
    tuple_arr[i] = (char *) malloc (STRLEN * sizeof (char));

  count_arr = (double *) malloc (num_tuples * sizeof (double));
  background_probs = (double *) malloc (bntree->num_states * sizeof (double));

  parse_ss (bntree, ss_filename, tuple_arr, count_arr, background_probs);


  /* initialize parameters */
  bntree->param_map = (int **)malloc(bntree->num_states * sizeof(int *));
  for (i=0; i<bntree->num_states; i++)
    bntree->param_map[i] = (int *)malloc(bntree->num_states * sizeof(int));

  bntree->weight_idx = (int **)malloc(bntree->num_states * sizeof(int *));
  for (i=0; i<bntree->num_states; i++)
    bntree->weight_idx[i] = (int *)malloc(bntree->num_states * sizeof(int));

  if (model_type == R0)
    init_params_R0 (bntree);
  else if (model_type == G0)
    init_params_G0 (bntree);

  else if (model_type == R1)
    init_params_R1 (bntree);
  else if (model_type == G1)
    init_params_G1 (bntree);

  else if (model_type == G2)
    init_params_G2 (bntree);
  else {
    fprintf (stderr, "Model type %s not yet supported\n", modelstr);
    exit (-1);
  }


  /* create inverse parameter map */
  bntree->inv_param_map = (CellListItem **)malloc(bntree->num_params * sizeof (CellListItem *));
  for (i=0; i<bntree->num_params; i++)
    bntree->inv_param_map[i] = NewCellList();
  
  for (i=0; i<bntree->num_states; i++) {
    for (j=0; j<bntree->num_states; j++) {
      param_num = bntree->param_map[i][j];
      cell_list_append(bntree->inv_param_map[param_num], i, j);
    }
  }


  /* allocate local CPDs and expectations */
  for (u=0; u<bntree->num_nodes; u++) {
    node = bntree->preorder[u];
    if (node->parent == NULL) continue;
    
    node->E = (double **)malloc(bntree->num_states * sizeof(double *));
    for (i=0; i<bntree->num_states; i++)
      node->E[i] = (double *)malloc(bntree->num_states * sizeof(double));

    node->P = (float **)malloc(bntree->num_states * sizeof(float *));
    for (i=0; i<bntree->num_states; i++)
      node->P[i] = (float *)malloc(bntree->num_states * sizeof(float));

    node->W = (double **)malloc(bntree->num_states * sizeof(double *));
    for (i=0; i<bntree->num_states; i++)
      node->W[i] = (double *)malloc(bntree->num_states * sizeof(double));
  }

  /* randomly initialize the parameters in a way that guarantees all 
     probabilities will be within 0 and 1, and all rows sum to 1 */
  /* also, self substitution probs will be at least 0.5 */
  for (u=0; u<bntree->num_nodes; u++) {
    node = bntree->preorder[u];
    if (node->parent == NULL) continue;

    srand(time(NULL));
    for (k=0; k<bntree->num_params - bntree->num_states; k++) {
      param_val = bounded_random (0.05, 0.50) / (bntree->num_states - 1);
      for (cli = bntree->inv_param_map[k]; cli != NULL; cli = cli->next) {
	i = cli->row;
	j = cli->col;
	node->P[i][j] = param_val;
      }
    }
    for (i=0; i<bntree->num_states; i++) {
      row_sum = 0;
      for (j=0; j<bntree->num_states; j++) {
	if (j != i) {
	  row_sum += node->P[i][j];
	}
      }
      node->P[i][i] = 1 - row_sum;
    }
  }

  /* run EM algorithm */
  /* We'll output the current tree after every iteration */
  iteration = 0;
  converged = 0;
  ll = calc_log_likelihood (bntree, tuple_arr, count_arr, num_tuples, 0);
  fprintf (stderr, "Initial log likelihood: %f\n", ll);
  while (! converged) {
    iteration++;

    /* improve likelihood */
    Estep(bntree, tuple_arr, count_arr, num_tuples);
    Mstep(bntree);

    /* output new model */
    out_fp = fopen (output_filename, "w");
    print_tree (out_fp, bntree);
    fclose (out_fp);

    /* check for convergence */
    last_ll = ll;
    ll = calc_log_likelihood (bntree, tuple_arr, count_arr, num_tuples, 0);
    fprintf (stderr, "EM iteration %d, log likelihood: %f\n", iteration, ll);
    if ( (last_ll - ll) / last_ll < EM_TOL || iteration >= MAX_EM_ITERS)
      converged = 1;
  }
  
  return bntree;
}
Exemplo n.º 6
0
void XEMModel::initCEM_INIT(XEMStrategyInit * strategyInit) {
    //cout<<"init CEM, nbTryInInit="<<strategyInit->getNbTry()<<endl;
    _algoName = CEM;
    int64_t  i,j;
    double cLogLikelihood, oldLogLikelihood, bestCLogLikelihood;
    XEMParameter * bestParameter = _parameter->clone();
    int64_t  nbRunOfCEMOk = 0;
    bestCLogLikelihood = 0.0;

    for (i=0; i<strategyInit->getNbTry(); i++) {
        nbRunOfCEMOk++;
        try {
            _parameter->reset(); // reset to default values
            initRANDOM(1);
            _algoName = CEM;
            int64_t  nbIter = 0;
            bool fin = false;
            while (!fin && nbIter<=maxNbIterationInCEM_INIT) {
                Estep();
                Cstep();
                Mstep();
                nbIter++;
                if (nbIter == 1) {
                    oldLogLikelihood = getCompletedLogLikelihood();
                }
                else {
                    cLogLikelihood = getCompletedLogLikelihood();
                    if (cLogLikelihood == oldLogLikelihood) {
                        fin = true;
                    }
                    else {
                        oldLogLikelihood = cLogLikelihood;
                    }
                }
            }
            //cout<<"dans init CEM, nb d'iterations effectuées : "<<nbIter<<endl;
            // Compute log-likelihood
            cLogLikelihood = getCompletedLogLikelihood();
            // Comparaison of log-likelihood between step p and p-1
            if ((nbRunOfCEMOk==1) || (cLogLikelihood > bestCLogLikelihood)) {
                bestCLogLikelihood = cLogLikelihood;
                bestParameter->recopy(_parameter);
            }
            //cout<<"nbIter : "<<nbIter<<endl;
        }
        catch (XEMErrorType errorType) {
            nbRunOfCEMOk--;
        }
    }

    if (nbRunOfCEMOk==0) {
        delete _parameter;
        _parameter = bestParameter;
        _parameter->setModel(this);
        throw CEM_INIT_error;
    }

    //cout<<"fin de init CEM, nb d'essais effectues="<<i<<endl;
    // set Best parameter
    delete _parameter;
    _parameter = bestParameter;
    _parameter->setModel(this);

}