Пример #1
0
/*
  This routine clusters a dataset into an optimal number of groups using the 
  E-M algorithm and a choice of AIC or BIC and with start points provided by 
  the shortems function.  
  The function returns    the classification ids as well as the parameter 
  estimates. The parameter estimates are created in the routine and should not
  be allocated before the function call. This also means that the parameter 
  estimates have to be cleaned after the function call. (NOTE: This is an 
  important fact to remember. */
int em_EM(double **x,int n,int p,int nclass,double *pi,double **Mu,
	  double **LTSigma,double *llhdval,int *nc,int shortiter,
	  double shorteps)
{
  int flag=0;
  double like;

  if (nclass==1) {
/* These formulae are not correct with two errors,
   1. n-1 should be replace by n in meandispersion() for LTSigma, and
   2. determinant() will change the values of LTSigma[0], see "initials.c".
   Modified: Wei-Chen Chen on 2008/12/05.

    meandispersion(x,n,p,Mu[0],LTSigma[0]);
*/
    meandispersion_MLE(x,n,p,Mu[0],LTSigma[0]);
    like=-0.5*n*p-0.5*n*log(determinant(LTSigma[0],p))-0.5*n*p*log(2*PI);
    (*llhdval)=like;
  }
  else {
    shortems(n,p,nclass,pi,x,Mu,LTSigma,shortiter,shorteps);
    emcluster(n,p,nclass,pi,x,Mu,LTSigma,1000,0.0001,&like);
    (*llhdval)=like;
  } 
  return flag;
}
Пример #2
0
/* This is the model-based em.EM. */
int mb_em_EM(double **x, int n, int p, int nclass, double *pi, double **Mu,
    double **LTSigma, double *llhdval, int shortiter, double shorteps){
  if(nclass == 1){
    meandispersion_MLE(x, n, p, Mu[0], LTSigma[0]);
    *llhdval = -0.5 * n * p - 0.5 * n * log(determinant(LTSigma[0], p)) -
               0.5 * n * p * log(2 * PI);
  }
  else {
    shortems_mb(n, p, nclass, pi, x, Mu, LTSigma, shortiter, shorteps);
    emcluster(n, p, nclass, pi, x, Mu, LTSigma, 1000, 0.0001, llhdval);
  } 

  return 0;
} /* END of mb_em_EM(). */
Пример #3
0
/* This function calls emcluster() in "src/emcluster.c" and is called by
   emcluster() using .Call() in "R/fcn_emcluster.r".
   Input:
     R_x: SEXP[R_n * R_p], data matrix of R_n*R_p.
     R_n: SEXP[1], number of observations.
     R_p: SEXP[1], number of dimersions.
     R_nclass: SEXP[1], number of classes.		# k
     R_p_LTSigma: SEXP[1], dimersion of LTSigma, p * (p + 1) / 2.
     R_pi: SEXP[R_nclass], proportions of classes.
     R_Mu: SEXP[R_nclass, R_p], means of MVNs.
     R_LTSigma: SEXP[R_nclass, R_p * (R_p + 1) / 2], lower triangular sigma
                matrices.
     R_em_iter: SEXP[1], max iterations for emclust(), 1000 by default.
     R_em_eps: SEXP[1], tolerance for emclust(), 1e-4 by default.
   Output:
     ret: a list contains
       pi: SEXP[R_nclass], proportions of classes.
       Mu: SEXP[R_nclass, R_p], means of MVNs.
       LTSigma: SEXP[R_nclass, R_p * (R_p + 1) / 2], lower triangular sigma
                matrices.
       llhdval: SEXP[1], log likelihood value.
*/
SEXP R_emcluster(SEXP R_x, SEXP R_n, SEXP R_p, SEXP R_nclass, SEXP R_p_LTSigma,
    SEXP R_pi, SEXP R_Mu, SEXP R_LTSigma, SEXP R_em_iter, SEXP R_em_eps){
  /* Declare variables for calling C. */
  double **C_x, *C_pi, **C_Mu, **C_LTSigma, *C_llhdval, *C_em_eps;
  int *C_n, *C_p, *C_nclass, *C_p_LTSigma, *C_em_iter;

  /* Declare variables for R's returning. */
  SEXP pi, Mu, LTSigma, llhdval, ret, ret_names;

  /* Declare variables for processing. */
  double *tmp_1, *tmp_2;
  int i, j, tl;
  char *names[4] = {"pi", "Mu", "LTSigma", "llhdval"};

  /* Set initial values. */
  C_n = INTEGER(R_n);
  C_p = INTEGER(R_p);
  C_nclass = INTEGER(R_nclass);
  C_p_LTSigma = INTEGER(R_p_LTSigma);

  /* Allocate and protate storages. */
  PROTECT(pi = allocVector(REALSXP, *C_nclass));
  PROTECT(Mu = allocVector(REALSXP, *C_nclass * *C_p));
  PROTECT(LTSigma = allocVector(REALSXP, *C_nclass * *C_p_LTSigma));
  PROTECT(llhdval = allocVector(REALSXP, 1));
  PROTECT(ret = allocVector(VECSXP, 4));
  PROTECT(ret_names = allocVector(STRSXP, 4));

  i = 0;
  SET_VECTOR_ELT(ret, i++, pi);
  SET_VECTOR_ELT(ret, i++, Mu);
  SET_VECTOR_ELT(ret, i++, LTSigma);
  SET_VECTOR_ELT(ret, i++, llhdval);

  for(i = 0; i < 4; i++){
    SET_STRING_ELT(ret_names, i, mkChar(names[i])); 
  }
  setAttrib(ret, R_NamesSymbol, ret_names);

  /* Assign data. */
  C_x = allocate_double_array(*C_n);
  C_Mu = allocate_double_array(*C_nclass);
  C_LTSigma = allocate_double_array(*C_nclass);

  tmp_1 = REAL(R_x);
  for(i = 0; i < *C_n; i++){
    C_x[i] = tmp_1;
    tmp_1 += *C_p;
  }

  tmp_1 = REAL(Mu);
  tmp_2 = REAL(LTSigma);
  for(i = 0; i < *C_nclass; i++){
    C_Mu[i] = tmp_1;
    C_LTSigma[i] = tmp_2;
    tmp_1 += *C_p;
    tmp_2 += *C_p_LTSigma;
  }

  C_pi = REAL(pi);
  C_llhdval = REAL(llhdval);
  C_em_iter = INTEGER(R_em_iter);
  C_em_eps = REAL(R_em_eps);

  /* Copy R objects to input oebjects for C. */
  tmp_1 = REAL(R_pi);
  for(i = 0; i < *C_nclass; i++){
    C_pi[i] = *(tmp_1 + i);
  }
  tl = 0;
  tmp_1 = REAL(R_Mu);
  for(i = 0; i < *C_nclass; i++){
    for(j = 0; j < *C_p; j++){
      C_Mu[i][j] = *(tmp_1 + tl++);
    }
  }
  tl = 0;
  tmp_1 = REAL(R_LTSigma);
  for(i = 0; i < *C_nclass; i++){
    for(j = 0; j < *C_p_LTSigma; j++){
      C_LTSigma[i][j] = *(tmp_1 + tl++);
    }
  }

  /* Compute. */
  emcluster(*C_n, *C_p, *C_nclass, C_pi, C_x, C_Mu, C_LTSigma,
            *C_em_iter, *C_em_eps, C_llhdval);

  /* Free memory and release protectation. */
  free(C_x);
  free(C_Mu);
  free(C_LTSigma);
  UNPROTECT(6);

  return(ret);
} /* End of R_emcluster(). */