Beispiel #1
0
jahmm_t *
do_jahmm
(
   ChIP_t *ChIP
)
{

   // Extract the dimensions of the observations.
   const unsigned int r = ChIP->r;
   const unsigned int n = nobs(ChIP);


   // Extract the first ChIP profile, which is the sum of
   // negative controls.
   int *ctrl = malloc(n * sizeof(int));
   if (ctrl == NULL) {
      fprintf(stderr, "memory error %s:%d\n", __FILE__, __LINE__);
      return NULL;
   }

   for (size_t i = 0 ; i < n ; i++) {
      ctrl[i] = ChIP->y[0+i*r];
   }

   zinb_par_t *z = mle_zinb(ctrl, n);
   if (z == NULL) {
      fprintf(stderr, "jahmm failure %s:%d\n", __FILE__, __LINE__);
      apologize();
      return NULL;
   }

   free(ctrl);

   // Jahmm uses 3 states.
   const unsigned int m = 3;

   double *Q = malloc(m*m * sizeof(double));
   double *p = malloc(m*(r+1) * sizeof(double));
   if (Q == NULL || p == NULL) {
      fprintf(stderr, "memory error: %s:%d\n", __FILE__, __LINE__);
      return NULL;
   }
   // Set initial values of 'Q'.
   for (size_t i = 0 ; i < m ; i++) {
   for (size_t j = 0 ; j < m ; j++) {
      Q[i+j*m] = (i==j) ? .95 : .05/(m-1);
   }
   }

   // Set initial values of 'p'. The are not normalize, but the
   // call to 'bw_zinm' will normalize them.
   for (size_t i = 0 ; i < m ; i++) {
      p[0+i*(r+1)] = z->p;
      p[1+i*(r+1)] = 1 - z->p;
      for (size_t j = 2 ; j < r+1 ; j++) {
         p[j+i*(r+1)] = i + 0.5;
      }
   }

   jahmm_t *jahmm = new_jahmm(m, ChIP);
   if (jahmm == NULL) {
      fprintf(stderr, "memory error %s:%d\n", __FILE__, __LINE__);
      free(Q);
      free(p);
      return NULL;
   }
   set_jahmm_par(jahmm, Q, z->a, z->pi, p);

   free(Q);
   free(p);

   Q = jahmm->Q;
   p = jahmm->p;

   // Run the Baum-Welch algorithm.
   bw_zinm(jahmm);

   // Reorder the states in case they got scrambled.
   // We use the value of p0 as a sort key.
   int tmp, map[3] = {0,1,2};
   if (p[0*(r+1)] < p[1*(r+1)]) { tmp = map[0]; map[0] = map[1]; map[1] = tmp; }
   if (p[1*(r+1)] < p[2*(r+1)]) { tmp = map[1]; map[1] = map[2]; map[2] = tmp; }
   if (p[0*(r+1)] < p[1*(r+1)]) { tmp = map[0]; map[0] = map[1]; map[1] = tmp; }

   if (map[0] != 0 || map[1] != 1) {
      // The states have beem scrambled. We need to reorder
      // 'Q', 'p', 'phi' and 'pem'.
      
      double *phi = jahmm->phi;
      double *pem = jahmm->pem;

      double *Q_ = malloc(m*m * sizeof(double));
      double *p_ = malloc(m*(r+1) * sizeof(double));
      double *phi_ = malloc(m*n * sizeof(double));
      double *pem_ = malloc(m*n * sizeof(double));
      if (Q_ == NULL || p_ == NULL || phi_ == NULL || pem_ == NULL) {
         // TODO: free everything
         fprintf(stderr, "memory error %s:%d\n", __FILE__, __LINE__);
         return NULL;
      }

      for (size_t j = 0 ; j < m ; j++) {
      for (size_t i = 0 ; i < m ; i++) {
         Q_[map[i]+map[j]*m] = Q[i+j*m];
      }
      }
      memcpy(jahmm->Q, Q_, m*m * sizeof(double));

      for (size_t j = 0 ; j < m ; j++) {
      for (size_t i = 0 ; i < r+1 ; i++) {
         p_[i+map[j]*(r+1)] = p[i+j*(r+1)];
      }
      }
      memcpy(jahmm->p, p_, m*(r+1) * sizeof(double));

      for (size_t j = 0 ; j < m ; j++) {
      for (size_t i = 0 ; i < n ; i++) {
         phi_[map[j]+i*m] = phi[j+i*m];
         pem_[map[j]+i*m] = pem[j+i*m];
      }
      }
      memcpy(jahmm->phi, phi_, m*n * sizeof(double));
      memcpy(jahmm->pem, pem_, m*n * sizeof(double));

      free(Q_);
      free(p_);
      free(phi_);
      free(pem_);

   }

   // Run the Viterbi algorithm.
   int *path = malloc(n * sizeof(int));
   double *initp = malloc(m * sizeof(double));
   double *log_Q = malloc(m*m * sizeof(double));
   if (path == NULL || initp == NULL || log_Q == NULL) {
      fprintf(stderr, "memory error %s:%d\n", __FILE__, __LINE__);
      // TODO: free everything.
      return NULL;
   }

   for (size_t i = 0 ; i < m ; i++) initp[i] = log(1.0/m);
   for (size_t i = 0 ; i < m*m ; i++) log_Q[i] = log(Q[i]);

   block_viterbi(m, ChIP->nb, ChIP->size, log_Q, initp, jahmm->pem, path);
   jahmm->path = path;

   free(initp);
   free(log_Q);

   return jahmm;

}
Beispiel #2
0
zerone_t *
do_zerone
(
   ChIP_t * ChIP
)
{

   // The number of state in Zerone is an important constant.
   // So much depends on it that it may be frozen in the code.
   const unsigned int m = 3;

   int        * mock = NULL; // The mock ChIP profile.
   int        * path = NULL; // The Viterbi path.
   zinb_par_t * par  = NULL; // The parameter estimates.
   zerone_t   * Z    = NULL; // The Zerone instance.

   // Extract the dimensions of the observations.
   const unsigned int r = ChIP->r;
   const unsigned int n = nobs(ChIP);

   if (r > 63) {
      fprintf(stderr, "maximum number of profiles exceeded\n");
      goto clean_and_return;
   }

   // Extract the first ChIP profile (the sum of mock controls).
   mock = malloc(n * sizeof(int));
   if (mock == NULL) {
      fprintf(stderr, "memory error %s:%d\n", __FILE__, __LINE__);
      goto clean_and_return;
   }

   // Copy data to 'mock'.
   for (size_t i = 0 ; i < n ; i++) {
      mock[i] = ChIP->y[0+i*r];
   }

   par = mle_zinb(mock, n);
   if (par == NULL) {
// TODO: Change parametrization so that failure does not happen. //
      fprintf(stderr, "zerone failure %s:%d\n", __FILE__, __LINE__);
      apologize();
      goto clean_and_return;
   }

   free(mock);
   mock = NULL;

   double Q[9] = {0};
   double p[3*64] = {0};

   // Set initial values of 'Q'.
   for (size_t i = 0 ; i < 3 ; i++) {
   for (size_t j = 0 ; j < 3 ; j++) {
      Q[i+j*3] = (i==j) ? .95 : .025;
   }
   }

   // Set initial values of 'p'. They are not normalized,
   // but the call to 'bw_zinm' will normalize them.
   for (size_t i = 0 ; i < 3 ; i++) {
      p[0+i*(r+1)] = par->p;
      p[1+i*(r+1)] = 1 - par->p;
      for (size_t j = 2 ; j < r+1 ; j++) {
         p[j+i*(r+1)] = i + 0.5;
      }
   }

   Z = new_zerone(3, ChIP);

   if (Z == NULL) {
      // TODO: handle this case properly. //
      fprintf(stderr, "error in function '%s()' %s:%d\n",
            __func__, __FILE__, __LINE__);
      goto clean_and_return;
   }

   set_zerone_par(Z, Q, par->a, par->pi, p);

   // Run the Baum-Welch algorithm.
   bw_zinm(Z);

   // Reorder the states in case they got scrambled.
   // We use the value of p0 as a sort key (high p0
   // means low average signal and vice versa).
   int map[3] = {0,1,2};
   for (int i = 1 ; i < 3 ; i++) {
      if (Z->p[i*(r+1)] > Z->p[map[0]*(r+1)]) map[0] = i;
      if (Z->p[(i-1)*(r+1)] < Z->p[map[2]*(r+1)]) map[2] = i-1;
   }
   // The middle state is the remaining one.
   map[1] = 3-map[0]-map[2];
   debug_print("map: [%d, %d, %d]\n", map[0], map[1], map[2]);

   if (map[0] != 0 || map[1] != 1 || map[2] != 2) reorder(Z, map);

   // Run the Viterbi algorithm.
   double log_Q[9] = {0};
   double initp[3] = {0};

   path = malloc(n * sizeof(int));
   if (path == NULL) {
      fprintf(stderr, "memory error %s:%d\n", __FILE__, __LINE__);
      goto clean_and_return;
   }

   for (size_t i = 0 ; i < 3 ; i++) initp[i] = log(1.0/3);
   for (size_t i = 0 ; i < 9 ; i++) log_Q[i] = log(Z->Q[i]);

   // Find Viterbi path.
   block_viterbi(m, ChIP->nb, ChIP->sz, log_Q, initp, Z->pem, path);

   Z->path = path;

clean_and_return:
// TODO: Do the cleaning if necessary. //
   return Z;

}