Beispiel #1
0
int
main
(
   int argc,
   char **argv
)
{
   // Backtrace handler
   signal(SIGSEGV, SIGSEGV_handler); 
   char *expr, *input;

   // Unset flags (value -1).
   int showdist_flag  = -1;
   int showpos_flag   = -1;
   int printline_flag = -1;
   int matchonly_flag = -1;
   int showline_flag  = -1;
   int count_flag     = -1;
   int invert_flag    = -1;
   int compact_flag   = -1;
   int dist_flag      = -1;
   int verbose_flag   = -1;
   int endline_flag   = -1;
   int prefix_flag    = -1;
   int best_flag      = -1;
   int nondna_flag    = -1;
   int memory_flag    = -1;
   int all_flag       = -1;

   // Unset options (value 'UNSET').
   input = NULL;

   if (argc == 1) {
      say_version();
      say_usage();
      return EXIT_SUCCESS;
   }

   int c;
   while (1) {
      int option_index = 0;
      static struct option long_options[] = {
         {"positions",     no_argument, 0, 'p'},
         {"match-only",    no_argument, 0, 'm'},
         {"no-printline",  no_argument, 0, 'n'},
         {"print-dist",    no_argument, 0, 'k'},
         {"nondna",  required_argument, 0, 'x'},
         {"lines",         no_argument, 0, 'l'},
         {"count",         no_argument, 0, 'c'},
         {"invert",        no_argument, 0, 'i'},
         {"format-compact",no_argument, 0, 'f'},
         {"verbose",       no_argument, 0, 'z'},
         {"version",       no_argument, 0, 'v'},
         {"help",          no_argument, 0, 'h'},
         {"end",           no_argument, 0, 'e'},         
         {"prefix",        no_argument, 0, 'r'},                  
         {"best",          no_argument, 0, 'b'},                  
         {"all",           no_argument, 0, 'a'},                  
         {"memory",  required_argument, 0, 'y'},                  
         {"distance",required_argument, 0, 'd'},
         {0, 0, 0, 0}
      };

      c = getopt_long(argc, argv, "apmnilczfvkherby:d:x:",
            long_options, &option_index);
 
      /* Detect the end of the options. */
      if (c == -1) break;
  
      switch (c) {
      case 'd':
         if (dist_flag < 0) {
            int dist = atoi(optarg);
            if (dist < 0) {
               say_version();
               fprintf(stderr, "error: distance must be a positive integer.\n");
               say_help();
               return EXIT_FAILURE;
            }
            dist_flag = atoi(optarg);
         }
         else {
            say_version();
            fprintf(stderr, "error: distance option set more than once.\n");
            say_help();
            return EXIT_FAILURE;
         }
         break;

      case 'y':
         if (memory_flag < 0) {
            int memory = atoi(optarg);
            if (memory < 0) {
               say_version();
               fprintf(stderr, "error: memory limit must be a positive integer.\n");
               say_help();
               return EXIT_FAILURE;
            }
            memory_flag = atoi(optarg);
         }
         else {
            say_version();
            fprintf(stderr, "error: memory option set more than once.\n");
            say_help();
            return EXIT_FAILURE;
         }
         break;

      case 'v':
         say_version();
         return EXIT_SUCCESS;

      case 'z':
         if (verbose_flag < 0) {
            verbose_flag = 1;
         }
         else {
            say_version();
            fprintf(stderr, "error: verbose option set more than once.\n");
            say_help();
            return EXIT_FAILURE;
         }
         break;

      case 'r':
         if (prefix_flag < 0) {
            prefix_flag = 1;
         }
         else {
            say_version();
            fprintf(stderr, "error: 'prefix' option set more than once.\n");
            say_help();
            return EXIT_FAILURE;
         }
         break;

      case 'a':
         if (all_flag < 0) {
            all_flag = 1;
         }
         else {
            say_version();
            fprintf(stderr, "error: 'all' option set more than once.\n");
            say_help();
            return EXIT_FAILURE;
         }
         break;

      case 'x':
         if (nondna_flag < 0) {
            int nondna = atoi(optarg);
            if (nondna < 0 || nondna > 2) {
               say_version();
               fprintf(stderr, "error: nondna value must be either 0, 1 or 2.\n");
               say_help();
               return EXIT_FAILURE;
            }               
            nondna_flag = nondna;
         }
         else {
            say_version();
            fprintf(stderr, "error: 'nondna' option set more than once.\n");
            say_help();
            return EXIT_FAILURE;
         }
         break;


      case 'b':
         if (best_flag < 0) {
            best_flag = 1;
         }
         else {
            say_version();
            fprintf(stderr, "error: 'best' option set more than once.\n");
            say_help();
            return EXIT_FAILURE;
         }
         break;


      case 'e':
         if (endline_flag < 0) {
            endline_flag = 1;
         }
         else {
            say_version();
            fprintf(stderr, "error: line-end option set more than once.\n");
            say_help();
            return EXIT_FAILURE;
         }
         break;

      case 'p':
         if (showpos_flag < 0) {
            showpos_flag = 1;
         }
         else {
            say_version();
            fprintf(stderr, "error: show-position option set more than once.\n");
            say_help();
            return EXIT_FAILURE;
         }
         break;

      case 'm':
         if (matchonly_flag < 0) {
            matchonly_flag = 1;
         }
         else {
            say_version();
            fprintf(stderr, "error: match-only option set more than once.\n");
            say_help();
            return EXIT_FAILURE;
         }
         break;

      case 'n':
         if (printline_flag < 0) {
            printline_flag = 0;
         }
         else {
            say_version();
            fprintf(stderr, "error: no-printline option set more than once.\n");
            say_help();
            return EXIT_FAILURE;
         }
         break;

      case 'k':
         if (showdist_flag < 0) {
            showdist_flag = 1;
         }
         else {
            say_version();
            fprintf(stderr, "error: show-distance option set more than once.\n");
            say_help();
            return EXIT_FAILURE;
         }
         break;

      case 'l':
         if (showline_flag < 0) {
            showline_flag = 1;
         }
         else {
            say_version();
            fprintf(stderr, "error: show-line option set more than once.\n");
            say_help();
            return EXIT_FAILURE;
         }
         break;

      case 'c':
         if (count_flag < 0) {
            count_flag = 1;
         }
         else {
            say_version();
            fprintf(stderr, "error: count option set more than once.\n");
            say_help();
            return EXIT_FAILURE;
         }
         break;

      case 'i':
         if (invert_flag < 0) {
            invert_flag = 1;
         }
         else {
            say_version();
            fprintf(stderr, "error: invert option set more than once.\n");
            say_help();
            return EXIT_FAILURE;
         }
         break;

      case 'f':
         if (compact_flag < 0) {
            compact_flag = 1;
         }
         else {
            say_version();
            fprintf(stderr, "error: format-compact option set more than once.\n");
            say_help();
            return EXIT_FAILURE;
         }
         break;

      case 'h':
         say_version();
         say_usage();
         exit(0);

      default:
         break;
      }
   }

   if (optind == argc) {
      say_version();
      fprintf(stderr, "error: not enough arguments.\n");
      say_help();
      return EXIT_FAILURE;
   }
   expr = argv[optind++];

   if (optind < argc) {
      if ((optind == argc - 1) && (input == NULL)) {
         input = argv[optind];
      }
      else {
         say_version();
         fprintf(stderr, "error: too many options.\n");
         say_help();
         return EXIT_FAILURE;
      }
   }
   if (count_flag == -1) count_flag = 0;
   if (showdist_flag == -1) showdist_flag = 0;
   if (showpos_flag  == -1) showpos_flag = 0;
   if (matchonly_flag == -1) matchonly_flag = 0;
   if (showline_flag == -1) showline_flag = 0;
   if (invert_flag == -1) invert_flag = 0;
   if (compact_flag == -1) compact_flag = 0;
   if (dist_flag == -1) dist_flag = 0;
   if (verbose_flag == -1) verbose_flag = 0;
   if (endline_flag == -1) endline_flag = 0;
   if (prefix_flag == -1) prefix_flag = 0;
   if (best_flag == -1) best_flag = 0;
   if (nondna_flag == -1) nondna_flag = 0;
   if (memory_flag == -1) memory_flag = 0;
   if (all_flag == -1) all_flag = 0;
   if (printline_flag == -1) printline_flag = (!matchonly_flag && !endline_flag && !prefix_flag);

   if (!showdist_flag && !showpos_flag && !printline_flag && !matchonly_flag && !showline_flag && !count_flag && !compact_flag && !prefix_flag && !endline_flag) {
      say_version();
      fprintf(stderr, "Invalid options: No output will be generated.\n");
      say_help();
      return EXIT_FAILURE;
   }

   int maskcnt = !count_flag;
   int maskinv = !invert_flag * maskcnt;

   struct seeqarg_t args;

   args.showdist  = showdist_flag * maskinv;
   args.showpos   = showpos_flag * maskinv;
   args.showline  = showline_flag * maskcnt;
   args.printline = printline_flag * maskinv;
   args.matchonly = matchonly_flag * maskinv;
   args.count     = count_flag;
   args.compact   = compact_flag * maskinv;
   args.dist      = dist_flag;
   args.verbose   = verbose_flag;
   args.endline   = endline_flag * maskinv;
   args.prefix    = prefix_flag * maskinv;
   args.invert    = invert_flag * maskcnt;
   args.best      = best_flag * maskinv;
   args.non_dna    = nondna_flag;
   args.all       = all_flag;
   args.memory    = (size_t)memory_flag * 1024*1024;
   return seeq(expr, input, args);
}
Beispiel #2
0
int main(int argc, char **argv) {

   debug_print("%s (DEBUG)\n", VERSION);

   if (argc == 1) {
      say_usage();
      exit(EXIT_SUCCESS);
   }

   // Input file names (mock and ChIP).
   char *mock_fnames[MAXNARGS+1] = {0};
   char *ChIP_fnames[MAXNARGS+1] = {0};

   int n_mock_files = 0;
   int n_ChIP_files = 0;
   int no_mock_specified = 1;
   int no_ChIP_specified = 1;

   static int list_flag = 0;
   static int minmapq = 20;
   static int window = 300;
   static int mock_flag = 1;
   static double minconf = 0.0;

   // Needed to check 'strtoul()'.
   char *endptr;

   // Parse options.
   debug_print("%s", "arguments:'\n");
   while(1) {
      int option_index = 0;
      static struct option long_options[] = {
         {"chip",        required_argument,          0, '1'},
         {"confidence",  required_argument,          0, 'c'},
         {"help",        no_argument,                0, 'h'},
         {"list-output", no_argument,       &list_flag,  1 },
         {"mock",        required_argument,          0, '0'},
         {"no-mock",     no_argument,       &mock_flag,  0 },
         {"quality",     required_argument,          0, 'q'},
         {"version",     no_argument,                0, 'v'},
         {"window",      required_argument,          0, 'w'},
         {0, 0, 0, 0}
      };

      int c = getopt_long(argc, argv, "0:1:c:hlq:vw:",
            long_options, &option_index);

      // Done parsing named options. //
      if (c == -1) break;

      switch (c) {
      case 0:
         break;

      case '0':
         debug_print("| mock files(s): %s\n", optarg);
         parse_fname(mock_fnames, optarg, &n_mock_files);
         no_mock_specified = 0;
         break;

      case '1':
         debug_print("| ChIP files(s): %s\n", optarg);
         parse_fname(ChIP_fnames, optarg, &n_ChIP_files);
         no_ChIP_specified = 0;
         break;

      case 'h':
         say_usage();
         return EXIT_SUCCESS;

      case 'l':
         list_flag = 1;
         break;

      case 'c':
         // Decode argument with 'strtod()'
         errno = 0;
         endptr = NULL;
         minconf = strtod(optarg, &endptr);
         if (!check_strtoX(optarg, endptr) || minconf < 0 || minconf > 1) {
            fprintf(stderr,
                  "zerone error: confidence must be "
                  "a float between 0 and 1\n");
            say_usage();
            return EXIT_FAILURE;
         }
         debug_print("| minconf: %f\n", minconf);
         break;

      case 'q':
         // Decode argument with 'strtoul()'
         errno = 0;
         endptr = NULL;
         minmapq = strtoul(optarg, &endptr, 10);
         if (!check_strtoX(optarg, endptr) ||
               minmapq < 0 || minmapq > 254) {
            fprintf(stderr,
                  "zerone error: minimum mapping quality must be "
                  "an integer between 0 and 254\n");
            say_usage();
            return EXIT_FAILURE;
         }
         debug_print("| minmapq: %d\n", minmapq);
         break;

      case 'v':
         say_version();
         return EXIT_SUCCESS;

      case 'w':
         window = atoi(optarg);
         if (window <= 0) {
            fprintf(stderr, "zerone error: window must be a "
                  "positive integer\n");
            say_usage();
            return EXIT_FAILURE;
         }
         debug_print("| window: %d\n", window);
         break;

      default:
         // Cannot parse. //
         say_usage();
         return EXIT_FAILURE;

      }

   }

   // Now parse positional arguments (file names).
   while (optind < argc) {
      parse_fname(ChIP_fnames, argv[optind++], &n_ChIP_files);
   }

   debug_print("%s", "done parsing arguments\n");

   // Check options.
   if (no_mock_specified && mock_flag) {
      fprintf(stderr,
         "zerone error: specify a file for mock control experiment\n");
      say_usage();
      return EXIT_FAILURE;
   }
   if (no_ChIP_specified) {
      fprintf(stderr,
         "zerone error: specify a file for ChIP-seq experiment\n");
      say_usage();
      return EXIT_FAILURE;
   }

   // Process input files.
   zerone_parser_args_t args;
   args.window = window;
   args.minmapq = minmapq;

   ChIP_t *ChIP = parse_input_files(mock_fnames, ChIP_fnames, args);

   if (ChIP == NULL) {
      fprintf(stderr, "error while reading input\n");
      exit(EXIT_FAILURE);
   }

   // debug info //
   {
      debug_print("%s", "done reading input files\n");
      debug_print("%s", "ChIP:\n");
      debug_print("| r = %ld (dimension)\n", ChIP->r);
      debug_print("| nb = %d (block number)\n", ChIP->nb);
      for (int j = 0 ; j < ChIP->nb ; j++) {
         debug_print("| block %s (size: %d)\n",
               ChIP->nm + 32*j, ChIP->sz[j]);
      }
      // Sum reads of all blocks.
      size_t *nreads = calloc(ChIP->r, sizeof(size_t));
      if (nreads == NULL) {
         fprintf(stderr, "memory error\n");
         exit(EXIT_FAILURE);
      }
      for (int i = 0 ; i < nobs(ChIP) ; i++) {
         for (int j = 0 ; j < ChIP->r ; j++) {
            nreads[j] += ChIP->y[j + i*ChIP->r];
         }
      }
      debug_print("| aggregated mock: %ld reads\n", nreads[0]);
      for (int j = 0 ; j < ChIP->r-1 ; j++) {
         debug_print("| %s: %ld reads\n", ChIP_fnames[j], nreads[j+1]);
      }
      free(nreads);
   }

   // Do zerone.
   debug_print("%s", "starting zerone\n");
   zerone_t *Z = do_zerone(ChIP);

   if (Z == NULL) {
      fprintf(stderr, "run time error (sorry)\n");
      exit(EXIT_FAILURE);
   }

   // debug info //
   {
      debug_print("%s", "Q:\n");
      debug_print("%.3f %.3f %.3f\n", Z->Q[0], Z->Q[3], Z->Q[6]);
      debug_print("%.3f %.3f %.3f\n", Z->Q[1], Z->Q[4], Z->Q[7]);
      debug_print("%.3f %.3f %.3f\n", Z->Q[2], Z->Q[5], Z->Q[8]);

      debug_print("%s", "p:\n");
      for (int j = 0 ; j < 3 ; j++) {
         int off = 0;
         char debuf[512];
         for (int i = 0 ; i < Z->r+1 ; i++) {
            off += sprintf(debuf + off, "%.3f ", Z->p[i+j*(Z->r+1)]);
            if (off > 499) break;
         }
         debug_print("%s\n", debuf);
      }
   }

   // Quality control.
   double feat[5];
   double QC = zerone_qc(Z, feat);
   fprintf(stdout, "# QC score: %.3f\n", QC);
   fprintf(stdout, "# features: %.3f, %.3f, %.3f, %.3f, %.3f\n",
                           feat[0], feat[1], feat[2], feat[3], feat[4]);
   fprintf(stdout, "# advice: %s discretization.\n",
         QC >= 0 ? "accept" : "reject");

   // List output.
   if (list_flag) {
      int wid = 0;
      int target = 0;
      double best = 0.0;
      for (int i = 0 ; i < ChIP->nb ; i++) {
         char *name = ChIP->nm + 32*i;

         // Do not print the last bin because it may extend
         // beyond the limit of the chromosome.
         for (int j = 0 ; j < ChIP->sz[i]-1 ; j++) {
            // Toggle on target state.
            double conf = Z->phi[2+wid*3];
            if (!target && Z->path[wid] == 2 && conf > minconf) {
               fprintf(stdout, "%s\t%d\t", name, window*j + 1);
               best = conf;
               target = 1;
            }
            // Toggle off target state.
            else if (target) {
               // Update best score.
               if (conf > best) best = conf;
               if (Z->path[wid] != 2 || conf < minconf) {
                  fprintf(stdout, "%d\t%.5f\n", window*(j+1), best);
                  best = 0.0;
                  target = 0;
               }
            }
            wid++;
         }
         // In case the end of the block is a target.
         if (target) {
            fprintf(stdout, "%d\t%.5f\n", window * ChIP->sz[i], best);
            best = 0.0;
            target = 0;
         }
      }
   }

   // Table output.
   else {
      // Use 'offset' to navigate in the ChIP blocks.
      uint64_t offset = 0;
      // In case no mock was provided, skip the column.
      const int skipmock = mock_flag ? 0 : 1;

      for (int i = 0 ; i < ChIP->nb ; i++) {
         char *name = ChIP->nm + 32*i;

         // Do not print the last bin because it may extend
         // beyond the limit of the chromosome.
         for (int j = 0 ; j < ChIP->sz[i]-1 ; j++) {
            // Skip if 'confidence' too low.
            if (Z->phi[2+(offset+j)*3] < minconf) continue;
            fprintf(stdout, "%s\t%d\t%d\t%d", name, window*j + 1,
                    // Block name, window start, end, state.
                    window*(j+1), Z->path[offset+j] == 2 ? 1 : 0);
            for (int k = skipmock ; k < Z->ChIP->r ; k++) {
               fprintf(stdout, "\t%d",
                    // Read numbers of each file.
                    Z->ChIP->y[(offset+j)*Z->ChIP->r+k]);
            }
            fprintf(stdout, "\t%.5f\n",
                    // Confidence score.
                    Z->phi[2+(offset+j)*3]);
         }
         // End of the block. Update 'offset' before
         // local window number is reset to 0.
         offset += Z->ChIP->sz[i];
      }
   }


   destroy_zerone_all(Z); // Also frees ChIP.

   for (int i = 0 ; i < MAXNARGS ; i++) {
      free(mock_fnames[i]);
      free(ChIP_fnames[i]);
   }

   return 0;

}