Exemplo n.º 1
0
void
reorder
(
   zerone_t * Z,
   int      * map
)
{

   // NB: assume that r < 63 (checked by 'zerone()').
   const unsigned int r = Z->ChIP->r;
   const unsigned int n = nobs(Z->ChIP);

   // Reorder 'Q'.
   double Q[9] = {0};
   for (int i = 0 ; i < 3 ; i++) {
   for (int j = 0 ; j < 3 ; j++) {
      Q[i+j*3] = Z->Q[map[i]+map[j]*3];
   }
   }
   memcpy(Z->Q, Q, 9 * sizeof(double));

   // Reorder 'p'.
   double p[3*64] = {0};
   for (int i = 0 ; i < 3 ; i++) {
   for (int j = 0 ; j < r+1 ; j++) {
      p[j+i*(r+1)] = Z->p[j+map[i]*(r+1)];
   }
   }
   memcpy(Z->p, p, 3*(r+1) * sizeof(double));

   // Reorder 'phi'.
   double buffer[3] = {0};
   for (int i = 0 ; i < n ; i++) {
      for (int j = 0 ; j < 3 ; j++) buffer[j] = Z->phi[map[j]+i*3];
      memcpy(Z->phi + 3*i, buffer, 3 * sizeof(double));
   }

   // Reorder 'pem'.
   for (int i = 0 ; i < n ; i++) {
      for (int j = 0 ; j < 3 ; j++) buffer[j] = Z->pem[map[j]+i*3];
      memcpy(Z->pem + 3*i, buffer, 3 * sizeof(double));
   }

   return;

}
Exemplo n.º 2
0
jahmm_t *
do_jahmm
(
   ChIP_t *ChIP
)
{

   // Extract the dimensions of the observations.
   const unsigned int r = ChIP->r;
   const unsigned int n = nobs(ChIP);


   // Extract the first ChIP profile, which is the sum of
   // negative controls.
   int *ctrl = malloc(n * sizeof(int));
   if (ctrl == NULL) {
      fprintf(stderr, "memory error %s:%d\n", __FILE__, __LINE__);
      return NULL;
   }

   for (size_t i = 0 ; i < n ; i++) {
      ctrl[i] = ChIP->y[0+i*r];
   }

   zinb_par_t *z = mle_zinb(ctrl, n);
   if (z == NULL) {
      fprintf(stderr, "jahmm failure %s:%d\n", __FILE__, __LINE__);
      apologize();
      return NULL;
   }

   free(ctrl);

   // Jahmm uses 3 states.
   const unsigned int m = 3;

   double *Q = malloc(m*m * sizeof(double));
   double *p = malloc(m*(r+1) * sizeof(double));
   if (Q == NULL || p == NULL) {
      fprintf(stderr, "memory error: %s:%d\n", __FILE__, __LINE__);
      return NULL;
   }
   // Set initial values of 'Q'.
   for (size_t i = 0 ; i < m ; i++) {
   for (size_t j = 0 ; j < m ; j++) {
      Q[i+j*m] = (i==j) ? .95 : .05/(m-1);
   }
   }

   // Set initial values of 'p'. The are not normalize, but the
   // call to 'bw_zinm' will normalize them.
   for (size_t i = 0 ; i < m ; i++) {
      p[0+i*(r+1)] = z->p;
      p[1+i*(r+1)] = 1 - z->p;
      for (size_t j = 2 ; j < r+1 ; j++) {
         p[j+i*(r+1)] = i + 0.5;
      }
   }

   jahmm_t *jahmm = new_jahmm(m, ChIP);
   if (jahmm == NULL) {
      fprintf(stderr, "memory error %s:%d\n", __FILE__, __LINE__);
      free(Q);
      free(p);
      return NULL;
   }
   set_jahmm_par(jahmm, Q, z->a, z->pi, p);

   free(Q);
   free(p);

   Q = jahmm->Q;
   p = jahmm->p;

   // Run the Baum-Welch algorithm.
   bw_zinm(jahmm);

   // Reorder the states in case they got scrambled.
   // We use the value of p0 as a sort key.
   int tmp, map[3] = {0,1,2};
   if (p[0*(r+1)] < p[1*(r+1)]) { tmp = map[0]; map[0] = map[1]; map[1] = tmp; }
   if (p[1*(r+1)] < p[2*(r+1)]) { tmp = map[1]; map[1] = map[2]; map[2] = tmp; }
   if (p[0*(r+1)] < p[1*(r+1)]) { tmp = map[0]; map[0] = map[1]; map[1] = tmp; }

   if (map[0] != 0 || map[1] != 1) {
      // The states have beem scrambled. We need to reorder
      // 'Q', 'p', 'phi' and 'pem'.
      
      double *phi = jahmm->phi;
      double *pem = jahmm->pem;

      double *Q_ = malloc(m*m * sizeof(double));
      double *p_ = malloc(m*(r+1) * sizeof(double));
      double *phi_ = malloc(m*n * sizeof(double));
      double *pem_ = malloc(m*n * sizeof(double));
      if (Q_ == NULL || p_ == NULL || phi_ == NULL || pem_ == NULL) {
         // TODO: free everything
         fprintf(stderr, "memory error %s:%d\n", __FILE__, __LINE__);
         return NULL;
      }

      for (size_t j = 0 ; j < m ; j++) {
      for (size_t i = 0 ; i < m ; i++) {
         Q_[map[i]+map[j]*m] = Q[i+j*m];
      }
      }
      memcpy(jahmm->Q, Q_, m*m * sizeof(double));

      for (size_t j = 0 ; j < m ; j++) {
      for (size_t i = 0 ; i < r+1 ; i++) {
         p_[i+map[j]*(r+1)] = p[i+j*(r+1)];
      }
      }
      memcpy(jahmm->p, p_, m*(r+1) * sizeof(double));

      for (size_t j = 0 ; j < m ; j++) {
      for (size_t i = 0 ; i < n ; i++) {
         phi_[map[j]+i*m] = phi[j+i*m];
         pem_[map[j]+i*m] = pem[j+i*m];
      }
      }
      memcpy(jahmm->phi, phi_, m*n * sizeof(double));
      memcpy(jahmm->pem, pem_, m*n * sizeof(double));

      free(Q_);
      free(p_);
      free(phi_);
      free(pem_);

   }

   // Run the Viterbi algorithm.
   int *path = malloc(n * sizeof(int));
   double *initp = malloc(m * sizeof(double));
   double *log_Q = malloc(m*m * sizeof(double));
   if (path == NULL || initp == NULL || log_Q == NULL) {
      fprintf(stderr, "memory error %s:%d\n", __FILE__, __LINE__);
      // TODO: free everything.
      return NULL;
   }

   for (size_t i = 0 ; i < m ; i++) initp[i] = log(1.0/m);
   for (size_t i = 0 ; i < m*m ; i++) log_Q[i] = log(Q[i]);

   block_viterbi(m, ChIP->nb, ChIP->size, log_Q, initp, jahmm->pem, path);
   jahmm->path = path;

   free(initp);
   free(log_Q);

   return jahmm;

}
Exemplo n.º 3
0
zerone_t *
do_zerone
(
   ChIP_t * ChIP
)
{

   // The number of state in Zerone is an important constant.
   // So much depends on it that it may be frozen in the code.
   const unsigned int m = 3;

   int        * mock = NULL; // The mock ChIP profile.
   int        * path = NULL; // The Viterbi path.
   zinb_par_t * par  = NULL; // The parameter estimates.
   zerone_t   * Z    = NULL; // The Zerone instance.

   // Extract the dimensions of the observations.
   const unsigned int r = ChIP->r;
   const unsigned int n = nobs(ChIP);

   if (r > 63) {
      fprintf(stderr, "maximum number of profiles exceeded\n");
      goto clean_and_return;
   }

   // Extract the first ChIP profile (the sum of mock controls).
   mock = malloc(n * sizeof(int));
   if (mock == NULL) {
      fprintf(stderr, "memory error %s:%d\n", __FILE__, __LINE__);
      goto clean_and_return;
   }

   // Copy data to 'mock'.
   for (size_t i = 0 ; i < n ; i++) {
      mock[i] = ChIP->y[0+i*r];
   }

   par = mle_zinb(mock, n);
   if (par == NULL) {
// TODO: Change parametrization so that failure does not happen. //
      fprintf(stderr, "zerone failure %s:%d\n", __FILE__, __LINE__);
      apologize();
      goto clean_and_return;
   }

   free(mock);
   mock = NULL;

   double Q[9] = {0};
   double p[3*64] = {0};

   // Set initial values of 'Q'.
   for (size_t i = 0 ; i < 3 ; i++) {
   for (size_t j = 0 ; j < 3 ; j++) {
      Q[i+j*3] = (i==j) ? .95 : .025;
   }
   }

   // Set initial values of 'p'. They are not normalized,
   // but the call to 'bw_zinm' will normalize them.
   for (size_t i = 0 ; i < 3 ; i++) {
      p[0+i*(r+1)] = par->p;
      p[1+i*(r+1)] = 1 - par->p;
      for (size_t j = 2 ; j < r+1 ; j++) {
         p[j+i*(r+1)] = i + 0.5;
      }
   }

   Z = new_zerone(3, ChIP);

   if (Z == NULL) {
      // TODO: handle this case properly. //
      fprintf(stderr, "error in function '%s()' %s:%d\n",
            __func__, __FILE__, __LINE__);
      goto clean_and_return;
   }

   set_zerone_par(Z, Q, par->a, par->pi, p);

   // Run the Baum-Welch algorithm.
   bw_zinm(Z);

   // Reorder the states in case they got scrambled.
   // We use the value of p0 as a sort key (high p0
   // means low average signal and vice versa).
   int map[3] = {0,1,2};
   for (int i = 1 ; i < 3 ; i++) {
      if (Z->p[i*(r+1)] > Z->p[map[0]*(r+1)]) map[0] = i;
      if (Z->p[(i-1)*(r+1)] < Z->p[map[2]*(r+1)]) map[2] = i-1;
   }
   // The middle state is the remaining one.
   map[1] = 3-map[0]-map[2];
   debug_print("map: [%d, %d, %d]\n", map[0], map[1], map[2]);

   if (map[0] != 0 || map[1] != 1 || map[2] != 2) reorder(Z, map);

   // Run the Viterbi algorithm.
   double log_Q[9] = {0};
   double initp[3] = {0};

   path = malloc(n * sizeof(int));
   if (path == NULL) {
      fprintf(stderr, "memory error %s:%d\n", __FILE__, __LINE__);
      goto clean_and_return;
   }

   for (size_t i = 0 ; i < 3 ; i++) initp[i] = log(1.0/3);
   for (size_t i = 0 ; i < 9 ; i++) log_Q[i] = log(Z->Q[i]);

   // Find Viterbi path.
   block_viterbi(m, ChIP->nb, ChIP->sz, log_Q, initp, Z->pem, path);

   Z->path = path;

clean_and_return:
// TODO: Do the cleaning if necessary. //
   return Z;

}
Exemplo n.º 4
0
int main(int argc, char **argv) {

   debug_print("%s (DEBUG)\n", VERSION);

   if (argc == 1) {
      say_usage();
      exit(EXIT_SUCCESS);
   }

   // Input file names (mock and ChIP).
   char *mock_fnames[MAXNARGS+1] = {0};
   char *ChIP_fnames[MAXNARGS+1] = {0};

   int n_mock_files = 0;
   int n_ChIP_files = 0;
   int no_mock_specified = 1;
   int no_ChIP_specified = 1;

   static int list_flag = 0;
   static int minmapq = 20;
   static int window = 300;
   static int mock_flag = 1;
   static double minconf = 0.0;

   // Needed to check 'strtoul()'.
   char *endptr;

   // Parse options.
   debug_print("%s", "arguments:'\n");
   while(1) {
      int option_index = 0;
      static struct option long_options[] = {
         {"chip",        required_argument,          0, '1'},
         {"confidence",  required_argument,          0, 'c'},
         {"help",        no_argument,                0, 'h'},
         {"list-output", no_argument,       &list_flag,  1 },
         {"mock",        required_argument,          0, '0'},
         {"no-mock",     no_argument,       &mock_flag,  0 },
         {"quality",     required_argument,          0, 'q'},
         {"version",     no_argument,                0, 'v'},
         {"window",      required_argument,          0, 'w'},
         {0, 0, 0, 0}
      };

      int c = getopt_long(argc, argv, "0:1:c:hlq:vw:",
            long_options, &option_index);

      // Done parsing named options. //
      if (c == -1) break;

      switch (c) {
      case 0:
         break;

      case '0':
         debug_print("| mock files(s): %s\n", optarg);
         parse_fname(mock_fnames, optarg, &n_mock_files);
         no_mock_specified = 0;
         break;

      case '1':
         debug_print("| ChIP files(s): %s\n", optarg);
         parse_fname(ChIP_fnames, optarg, &n_ChIP_files);
         no_ChIP_specified = 0;
         break;

      case 'h':
         say_usage();
         return EXIT_SUCCESS;

      case 'l':
         list_flag = 1;
         break;

      case 'c':
         // Decode argument with 'strtod()'
         errno = 0;
         endptr = NULL;
         minconf = strtod(optarg, &endptr);
         if (!check_strtoX(optarg, endptr) || minconf < 0 || minconf > 1) {
            fprintf(stderr,
                  "zerone error: confidence must be "
                  "a float between 0 and 1\n");
            say_usage();
            return EXIT_FAILURE;
         }
         debug_print("| minconf: %f\n", minconf);
         break;

      case 'q':
         // Decode argument with 'strtoul()'
         errno = 0;
         endptr = NULL;
         minmapq = strtoul(optarg, &endptr, 10);
         if (!check_strtoX(optarg, endptr) ||
               minmapq < 0 || minmapq > 254) {
            fprintf(stderr,
                  "zerone error: minimum mapping quality must be "
                  "an integer between 0 and 254\n");
            say_usage();
            return EXIT_FAILURE;
         }
         debug_print("| minmapq: %d\n", minmapq);
         break;

      case 'v':
         say_version();
         return EXIT_SUCCESS;

      case 'w':
         window = atoi(optarg);
         if (window <= 0) {
            fprintf(stderr, "zerone error: window must be a "
                  "positive integer\n");
            say_usage();
            return EXIT_FAILURE;
         }
         debug_print("| window: %d\n", window);
         break;

      default:
         // Cannot parse. //
         say_usage();
         return EXIT_FAILURE;

      }

   }

   // Now parse positional arguments (file names).
   while (optind < argc) {
      parse_fname(ChIP_fnames, argv[optind++], &n_ChIP_files);
   }

   debug_print("%s", "done parsing arguments\n");

   // Check options.
   if (no_mock_specified && mock_flag) {
      fprintf(stderr,
         "zerone error: specify a file for mock control experiment\n");
      say_usage();
      return EXIT_FAILURE;
   }
   if (no_ChIP_specified) {
      fprintf(stderr,
         "zerone error: specify a file for ChIP-seq experiment\n");
      say_usage();
      return EXIT_FAILURE;
   }

   // Process input files.
   zerone_parser_args_t args;
   args.window = window;
   args.minmapq = minmapq;

   ChIP_t *ChIP = parse_input_files(mock_fnames, ChIP_fnames, args);

   if (ChIP == NULL) {
      fprintf(stderr, "error while reading input\n");
      exit(EXIT_FAILURE);
   }

   // debug info //
   {
      debug_print("%s", "done reading input files\n");
      debug_print("%s", "ChIP:\n");
      debug_print("| r = %ld (dimension)\n", ChIP->r);
      debug_print("| nb = %d (block number)\n", ChIP->nb);
      for (int j = 0 ; j < ChIP->nb ; j++) {
         debug_print("| block %s (size: %d)\n",
               ChIP->nm + 32*j, ChIP->sz[j]);
      }
      // Sum reads of all blocks.
      size_t *nreads = calloc(ChIP->r, sizeof(size_t));
      if (nreads == NULL) {
         fprintf(stderr, "memory error\n");
         exit(EXIT_FAILURE);
      }
      for (int i = 0 ; i < nobs(ChIP) ; i++) {
         for (int j = 0 ; j < ChIP->r ; j++) {
            nreads[j] += ChIP->y[j + i*ChIP->r];
         }
      }
      debug_print("| aggregated mock: %ld reads\n", nreads[0]);
      for (int j = 0 ; j < ChIP->r-1 ; j++) {
         debug_print("| %s: %ld reads\n", ChIP_fnames[j], nreads[j+1]);
      }
      free(nreads);
   }

   // Do zerone.
   debug_print("%s", "starting zerone\n");
   zerone_t *Z = do_zerone(ChIP);

   if (Z == NULL) {
      fprintf(stderr, "run time error (sorry)\n");
      exit(EXIT_FAILURE);
   }

   // debug info //
   {
      debug_print("%s", "Q:\n");
      debug_print("%.3f %.3f %.3f\n", Z->Q[0], Z->Q[3], Z->Q[6]);
      debug_print("%.3f %.3f %.3f\n", Z->Q[1], Z->Q[4], Z->Q[7]);
      debug_print("%.3f %.3f %.3f\n", Z->Q[2], Z->Q[5], Z->Q[8]);

      debug_print("%s", "p:\n");
      for (int j = 0 ; j < 3 ; j++) {
         int off = 0;
         char debuf[512];
         for (int i = 0 ; i < Z->r+1 ; i++) {
            off += sprintf(debuf + off, "%.3f ", Z->p[i+j*(Z->r+1)]);
            if (off > 499) break;
         }
         debug_print("%s\n", debuf);
      }
   }

   // Quality control.
   double feat[5];
   double QC = zerone_qc(Z, feat);
   fprintf(stdout, "# QC score: %.3f\n", QC);
   fprintf(stdout, "# features: %.3f, %.3f, %.3f, %.3f, %.3f\n",
                           feat[0], feat[1], feat[2], feat[3], feat[4]);
   fprintf(stdout, "# advice: %s discretization.\n",
         QC >= 0 ? "accept" : "reject");

   // List output.
   if (list_flag) {
      int wid = 0;
      int target = 0;
      double best = 0.0;
      for (int i = 0 ; i < ChIP->nb ; i++) {
         char *name = ChIP->nm + 32*i;

         // Do not print the last bin because it may extend
         // beyond the limit of the chromosome.
         for (int j = 0 ; j < ChIP->sz[i]-1 ; j++) {
            // Toggle on target state.
            double conf = Z->phi[2+wid*3];
            if (!target && Z->path[wid] == 2 && conf > minconf) {
               fprintf(stdout, "%s\t%d\t", name, window*j + 1);
               best = conf;
               target = 1;
            }
            // Toggle off target state.
            else if (target) {
               // Update best score.
               if (conf > best) best = conf;
               if (Z->path[wid] != 2 || conf < minconf) {
                  fprintf(stdout, "%d\t%.5f\n", window*(j+1), best);
                  best = 0.0;
                  target = 0;
               }
            }
            wid++;
         }
         // In case the end of the block is a target.
         if (target) {
            fprintf(stdout, "%d\t%.5f\n", window * ChIP->sz[i], best);
            best = 0.0;
            target = 0;
         }
      }
   }

   // Table output.
   else {
      // Use 'offset' to navigate in the ChIP blocks.
      uint64_t offset = 0;
      // In case no mock was provided, skip the column.
      const int skipmock = mock_flag ? 0 : 1;

      for (int i = 0 ; i < ChIP->nb ; i++) {
         char *name = ChIP->nm + 32*i;

         // Do not print the last bin because it may extend
         // beyond the limit of the chromosome.
         for (int j = 0 ; j < ChIP->sz[i]-1 ; j++) {
            // Skip if 'confidence' too low.
            if (Z->phi[2+(offset+j)*3] < minconf) continue;
            fprintf(stdout, "%s\t%d\t%d\t%d", name, window*j + 1,
                    // Block name, window start, end, state.
                    window*(j+1), Z->path[offset+j] == 2 ? 1 : 0);
            for (int k = skipmock ; k < Z->ChIP->r ; k++) {
               fprintf(stdout, "\t%d",
                    // Read numbers of each file.
                    Z->ChIP->y[(offset+j)*Z->ChIP->r+k]);
            }
            fprintf(stdout, "\t%.5f\n",
                    // Confidence score.
                    Z->phi[2+(offset+j)*3]);
         }
         // End of the block. Update 'offset' before
         // local window number is reset to 0.
         offset += Z->ChIP->sz[i];
      }
   }


   destroy_zerone_all(Z); // Also frees ChIP.

   for (int i = 0 ; i < MAXNARGS ; i++) {
      free(mock_fnames[i]);
      free(ChIP_fnames[i]);
   }

   return 0;

}