示例#1
0
static void parse_args(int argc, char **argv)
{
  BuildGraphTask task;
  memset(&task, 0, sizeof(task));
  task.prefs = SEQ_LOADING_PREFS_INIT;
  task.stats = SEQ_LOADING_STATS_INIT;
  uint8_t fq_offset = 0;
  int intocolour = -1;
  GraphFileReader tmp_gfile;

  // Arg parsing
  char cmd[100], shortopts[100];
  cmd_long_opts_to_short(longopts, shortopts, sizeof(shortopts));
  int c;
  bool sample_named = false, pref_unused = false;

  while((c = getopt_long_only(argc, argv, shortopts, longopts, NULL)) != -1) {
    cmd_get_longopt_str(longopts, c, cmd, sizeof(cmd));
    switch(c) {
      case 0: /* flag set */ break;
      case 'h': cmd_print_usage(NULL); break;
      case 't': cmd_check(!nthreads,cmd); nthreads = cmd_uint32_nonzero(cmd, optarg); break;
      case 'm': cmd_mem_args_set_memory(&memargs, optarg); break;
      case 'n': cmd_mem_args_set_nkmers(&memargs, optarg); break;
      case 'f': cmd_check(!futil_get_force(), cmd); futil_set_force(true); break;
      case 'k': cmd_check(!kmer_size,cmd); kmer_size = cmd_kmer_size(cmd, optarg); break;
      case 's':
        intocolour++;
        check_sample_name(optarg);
        sample_name_buf_add(&snamebuf, (SampleName){.colour = intocolour,
                                                    .name = optarg});
        sample_named = true;
        break;
      case '1':
      case '2':
      case 'i':
        pref_unused = false;
        if(!sample_named)
          cmd_print_usage("Please give sample name first [-s,--sample <name>]");
        asyncio_task_parse(&task.files, c, optarg, fq_offset, NULL);
        task.prefs.colour = intocolour;
        add_task(&task);
        break;
      case 'M':
             if(!strcmp(optarg,"FF")) task.prefs.matedir = READPAIR_FF;
        else if(!strcmp(optarg,"FR")) task.prefs.matedir = READPAIR_FR;
        else if(!strcmp(optarg,"RF")) task.prefs.matedir = READPAIR_RF;
        else if(!strcmp(optarg,"RR")) task.prefs.matedir = READPAIR_RR;
        else die("-M,--matepair <orient> must be one of: FF,FR,RF,RR");
        pref_unused = true; break;
      case 'O': fq_offset = cmd_uint8(cmd, optarg); pref_unused = true; break;
      case 'Q': task.prefs.fq_cutoff = cmd_uint8(cmd, optarg); pref_unused = true; break;
      case 'H': task.prefs.hp_cutoff = cmd_uint8(cmd, optarg); pref_unused = true; break;
      case 'p': task.prefs.remove_pcr_dups = true; pref_unused = true; break;
      case 'P': task.prefs.remove_pcr_dups = false; pref_unused = true; break;
      case 'g':
        if(intocolour == -1) intocolour = 0;
        graph_file_reset(&tmp_gfile);
        graph_file_open2(&tmp_gfile, optarg, "r", true, intocolour);
        intocolour = MAX2((size_t)intocolour, file_filter_into_ncols(&tmp_gfile.fltr)-1);
        gfile_buf_push(&gfilebuf, &tmp_gfile, 1);
        sample_named = false;
        break;
      case 'I':
        graph_file_reset(&tmp_gfile);
        graph_file_open(&tmp_gfile, optarg);
        if(file_filter_into_ncols(&tmp_gfile.fltr) > 1)
          warn("Flattening intersection graph into colour 0: %s", optarg);
        file_filter_flatten(&tmp_gfile.fltr, 0);
        gfile_buf_push(&gisecbuf, &tmp_gfile, 1);
        break;
      case ':': /* BADARG */
      case '?': /* BADCH getopt_long has already printed error */
        // cmd_print_usage(NULL);
        die("`"CMD" build -h` for help. Bad option: %s", argv[optind-1]);
      default: die("Bad option: %s", cmd);
    }
  }
示例#2
0
void read_thread_args_parse(struct ReadThreadCmdArgs *args,
                            int argc, char **argv,
                            const struct option *longopts, bool correct_cmd)
{
  size_t i;
  CorrectAlnInput task = CORRECT_ALN_INPUT_INIT;
  uint8_t fq_offset = 0;
  GPathReader tmp_gpfile;

  CorrectAlnInputBuffer *inputs = &args->inputs;
  args->memargs = (struct MemArgs)MEM_ARGS_INIT;
  args->fmt = SEQ_FMT_FASTQ;

  // Arg parsing
  char cmd[100];
  char shortopts[300];
  cmd_long_opts_to_short(longopts, shortopts, sizeof(shortopts));
  int used = 1, c;
  char *tmp_path;

  // silence error messages from getopt_long
  // opterr = 0;

  while((c = getopt_long_only(argc, argv, shortopts, longopts, NULL)) != -1) {
    cmd_get_longopt_str(longopts, c, cmd, sizeof(cmd));
    switch(c) {
      case 0: /* flag set */ break;
      case 'h': cmd_print_usage(NULL); break;
      case 'f': cmd_check(!futil_get_force(), cmd); futil_set_force(true); break;
      case 'o': cmd_check(!args->out_ctp_path,cmd); args->out_ctp_path = optarg; break;
      case 'p':
        memset(&tmp_gpfile, 0, sizeof(GPathReader));
        gpath_reader_open(&tmp_gpfile, optarg);
        gpfile_buf_push(&args->gpfiles, &tmp_gpfile, 1);
        break;
      case 't':
        cmd_check(!args->nthreads, cmd);
        args->nthreads = cmd_uint32_nonzero(cmd, optarg);
        break;
      case 'm': cmd_mem_args_set_memory(&args->memargs, optarg); break;
      case 'n': cmd_mem_args_set_nkmers(&args->memargs, optarg); break;
      case 'c': args->colour = cmd_uint32(cmd, optarg); break;
      case 'F':
        cmd_check(args->fmt == SEQ_FMT_FASTQ, cmd);
        args->fmt = cmd_parse_format(cmd, optarg);
        break;
      case '1':
      case '2':
      case 'i':
        used = 1;
        correct_aln_input_buf_push(inputs, &task, 1);
        asyncio_task_parse(&inputs->b[inputs->len-1].files, c, optarg,
                           fq_offset, correct_cmd ? &tmp_path : NULL);
        if(correct_cmd) inputs->b[inputs->len-1].out_base = tmp_path;
        break;
      case 'M':
             if(!strcmp(optarg,"FF")) task.matedir = READPAIR_FF;
        else if(!strcmp(optarg,"FR")) task.matedir = READPAIR_FR;
        else if(!strcmp(optarg,"RF")) task.matedir = READPAIR_RF;
        else if(!strcmp(optarg,"RR")) task.matedir = READPAIR_RR;
        else die("-M,--matepair <orient> must be one of: FF,FR,RF,RR");
        used = 0; break;
      case 'O': fq_offset = cmd_uint8(cmd, optarg); used = 0; break;
      case 'Q': task.fq_cutoff = cmd_uint8(cmd, optarg); used = 0; break;
      case 'H': task.hp_cutoff = cmd_uint8(cmd, optarg); used = 0; break;
      case 'l': task.crt_params.frag_len_min = cmd_uint32(cmd, optarg); used = 0; break;
      case 'L': task.crt_params.frag_len_max = cmd_uint32(cmd, optarg); used = 0; break;
      case 'w': task.crt_params.one_way_gap_traverse = true; used = 0; break;
      case 'W': task.crt_params.one_way_gap_traverse = false; used = 0; break;
      case 'd': task.crt_params.gap_wiggle = cmd_udouble(cmd, optarg); used = 0; break;
      case 'D': task.crt_params.gap_variance = cmd_udouble(cmd, optarg); used = 0; break;
      case 'X': task.crt_params.max_context = cmd_uint32(cmd, optarg); used = 0; break;
      case 'e': task.crt_params.use_end_check = true; used = 0; break;
      case 'E': task.crt_params.use_end_check = false; used = 0; break;
      case 'g': cmd_check(!args->dump_seq_sizes, cmd); args->dump_seq_sizes = optarg; break;
      case 'G': cmd_check(!args->dump_frag_sizes, cmd); args->dump_frag_sizes = optarg; break;
      case 'u': args->use_new_paths = true; break;
      case 'x': gen_paths_print_contigs = true; break;
      case 'y': gen_paths_print_paths = true; break;
      case 'z': gen_paths_print_reads = true; break;
      case 'Z':
        cmd_check(!args->fq_zero, cmd);
        if(strlen(optarg) != 1)
          cmd_print_usage("--fq-zero <c> requires a single char");
        args->fq_zero = optarg[0];
        break;
      case 'P': cmd_check(!args->append_orig_seq,cmd); args->append_orig_seq = true; break;
      case ':': /* BADARG */
      case '?': /* BADCH getopt_long has already printed error */
        // cmd_print_usage(NULL);
        die("`"CMD" thread/correct -h` for help. Bad option: %s", argv[optind-1]);
      default: abort();
    }
  }

  if(args->nthreads == 0) args->nthreads = DEFAULT_NTHREADS;

  // Check that optind+1 == argc
  if(optind+1 > argc)
    cmd_print_usage("Expected exactly one graph file");
  else if(optind+1 < argc)
    cmd_print_usage("Expected only one graph file. What is this: '%s'", argv[optind]);

  char *graph_path = argv[optind];
  status("Reading graph: %s", graph_path);

  if(!used) cmd_print_usage("Ignored arguments after last --seq");

  // ctx_thread requires output file
  if(!correct_cmd && !args->out_ctp_path)
    cmd_print_usage("--out <out.ctp> is required");

  //
  // Open graph graph file
  //
  GraphFileReader *gfile = &args->gfile;
  graph_file_open(gfile, graph_path);

  if(!correct_cmd && file_filter_into_ncols(&gfile->fltr) > 1)
    die("Please specify a single colour e.g. %s:0", file_filter_path(&gfile->fltr));

  //
  // Open path files
  //
  size_t path_max_usedcols = 0;
  for(i = 0; i < args->gpfiles.len; i++) {
    // file_filter_update_intocol(&args->pfiles.b[i].fltr, 0);
    if(!correct_cmd && file_filter_into_ncols(&args->gpfiles.b[i].fltr) > 1) {
      die("Please specify a single colour e.g. %s:0",
          file_filter_path(&args->gpfiles.b[i].fltr));
    }
    path_max_usedcols = MAX2(path_max_usedcols,
                             file_filter_into_ncols(&args->gpfiles.b[i].fltr));
  }
  args->path_max_usedcols = path_max_usedcols;

  // Check for compatibility between graph files and path files
  graphs_gpaths_compatible(gfile, 1, args->gpfiles.b, args->gpfiles.len, -1);

  // if no paths loaded, set all max_context values to 1, since >1 kmer only
  // useful if can pickup paths
  if(args->gpfiles.len == 0) {
    for(i = 0; i < inputs->len; i++)
      inputs->b[i].crt_params.max_context = 1;
  }

  // Check frag_len_min < frag_len_max
  for(i = 0; i < inputs->len; i++)
  {
    CorrectAlnInput *t = &inputs->b[i];
    t->files.ptr = t;
    if(t->crt_params.frag_len_min > t->crt_params.frag_len_max) {
      die("--min-ins %u is greater than --max-ins %u",
          t->crt_params.frag_len_min, t->crt_params.frag_len_max);
    }
    correct_aln_input_print(&inputs->b[i]);
    args->max_gap_limit = MAX2(args->max_gap_limit, t->crt_params.frag_len_max);
  }

  futil_create_output(args->dump_seq_sizes);
  futil_create_output(args->dump_frag_sizes);
}
示例#3
0
static void parse_args(int argc, char **argv)
{
  seq_format fmt = SEQ_FMT_FASTQ;
  bool invert = false;
  size_t i;

  aln_reads_buf_alloc(&inputs, 8);
  asyncio_buf_alloc(&files, 8);

  AlignReadsData input;
  AsyncIOInput seqfiles;

  // Arg parsing
  char cmd[100], shortopts[100];
  cmd_long_opts_to_short(longopts, shortopts, sizeof(shortopts));
  int c;

  while((c = getopt_long_only(argc, argv, shortopts, longopts, NULL)) != -1) {
    cmd_get_longopt_str(longopts, c, cmd, sizeof(cmd));
    switch(c) {
      case 0: /* flag set */ break;
      case 'h': cmd_print_usage(NULL); break;
      case 'f': cmd_check(!futil_get_force(), cmd); futil_set_force(true); break;
      case 't': cmd_check(!nthreads,cmd); nthreads = cmd_uint32_nonzero(cmd, optarg); break;
      case 'm': cmd_mem_args_set_memory(&memargs, optarg); break;
      case 'n': cmd_mem_args_set_nkmers(&memargs, optarg); break;
      case 'F': cmd_check(fmt==SEQ_FMT_FASTQ, cmd); fmt = cmd_parse_format(cmd, optarg); break;
      case 'v': cmd_check(!invert,cmd); invert = true; break;
      case '1':
      case '2':
      case 'i':
        memset(&input, 0, sizeof(input));
        memset(&seqfiles, 0, sizeof(seqfiles));
        asyncio_task_parse(&seqfiles, c, optarg, 0, &input.out_base);
        aln_reads_buf_push(&inputs, &input, 1);
        asyncio_buf_push(&files, &seqfiles, 1);
        break;
      case ':': /* BADARG */
      case '?': /* BADCH getopt_long has already printed error */
        // cmd_print_usage(NULL);
        die("`"CMD" reads -h` for help. Bad option: %s", argv[optind-1]);
      default: abort();
    }
  }

  ctx_assert(inputs.len == files.len);

  // Defaults
  if(!nthreads) nthreads = DEFAULT_NTHREADS;

  if(inputs.len == 0)
    cmd_print_usage("Please specify at least one sequence file (-1, -2 or -i)");

  if(optind >= argc)
    cmd_print_usage("Please specify input graph file(s)");

  num_gfiles = (size_t)(argc - optind);
  gfile_paths = argv + optind;

  for(i = 0; i < inputs.len; i++) {
    inputs.b[i].invert = invert;
    inputs.b[i].fmt = fmt;
    files.b[i].ptr = &inputs.b[i];
  }
}
示例#4
0
void read_thread_args_parse(struct ReadThreadCmdArgs *args,
                            int argc, char **argv,
                            const struct option *longopts, bool correct_cmd)
{
  size_t i;
  int tmp_thresh; // 0 => no calling, -1 => auto
  CorrectAlnInput task = CORRECT_ALN_INPUT_INIT;
  uint8_t fq_offset = 0;
  size_t dump_seq_n = 0, dump_mp_n = 0; // how many times are -g -G specified
  PathFileReader tmp_pfile;

  CorrectAlnInputBuffer *inputs = &args->inputs;

  // Arg parsing
  char cmd[100];
  char shortopts[300];
  cmd_long_opts_to_short(longopts, shortopts, sizeof(shortopts));
  int used = 1, c;
  char *tmp_path;

  // silence error messages from getopt_long
  // opterr = 0;

  while((c = getopt_long_only(argc, argv, shortopts, longopts, NULL)) != -1) {
    cmd_get_longopt_str(longopts, c, cmd, sizeof(cmd));
    switch(c) {
      case 0: /* flag set */ break;
      case 'h': cmd_print_usage(NULL); break;
      case 'o':
        if(args->out_ctp_path != NULL) cmd_print_usage(NULL);
        args->out_ctp_path = optarg;
        break;
      case 'p':
        tmp_pfile = INIT_PATH_READER;
        path_file_open(&tmp_pfile, optarg, true);
        pfile_buf_add(&args->pfiles, tmp_pfile);
        break;
      case 't':
        if(args->num_of_threads != 0) die("%s set twice", cmd);
        args->num_of_threads = cmd_parse_arg_uint32_nonzero(cmd, optarg);
        break;
      case 'm': cmd_mem_args_set_memory(&args->memargs, optarg); break;
      case 'n': cmd_mem_args_set_nkmers(&args->memargs, optarg); break;
      case 'c': args->colour = cmd_parse_arg_uint32(cmd, optarg); break;
      case '1':
      case '2':
      case 'i':
        used = 1;
        correct_aln_input_buf_add(inputs, task);
        asyncio_task_parse(&inputs->data[inputs->len-1].files, c, optarg,
                           fq_offset, correct_cmd ? &tmp_path : NULL);
        if(correct_cmd) inputs->data[inputs->len-1].out_base = tmp_path;
        break;
      case 'f': task.matedir = READPAIR_FR; used = 0; break;
      case 'F': task.matedir = READPAIR_FF; used = 0; break;
      case 'r': task.matedir = READPAIR_RF; used = 0; break;
      case 'R': task.matedir = READPAIR_RR; used = 0; break;
      case 'w': task.crt_params.one_way_gap_traverse = true; used = 0; break;
      case 'W': task.crt_params.one_way_gap_traverse = false; used = 0; break;
      case 'q': fq_offset = cmd_parse_arg_uint8(cmd, optarg); used = 0; break;
      case 'Q': task.fq_cutoff = cmd_parse_arg_uint8(cmd, optarg); used = 0; break;
      case 'H': task.hp_cutoff = cmd_parse_arg_uint8(cmd, optarg); used = 0; break;
      case 'e': task.crt_params.use_end_check = true; used = 0; break;
      case 'E': task.crt_params.use_end_check = false; used = 0; break;
      case 'g': task.crt_params.ins_gap_min = cmd_parse_arg_uint32(cmd, optarg); used = 0; break;
      case 'G': task.crt_params.ins_gap_max = cmd_parse_arg_uint32(cmd, optarg); used = 0; break;
      case 'S': args->dump_seq_sizes = optarg; dump_seq_n++; break;
      case 'M': args->dump_mp_sizes = optarg; dump_mp_n++; break;
      case 'u': args->use_new_paths = true; break;
      case 'C':
        if(optarg == NULL || strcmp(optarg,"auto")) args->clean_threshold = -1;
        else if(parse_entire_int(optarg,&tmp_thresh) && tmp_thresh >= -1) {
          if(tmp_thresh != -1 && tmp_thresh < 2)
            warn("Ignoring --clean %u (too small < 2)", tmp_thresh);
          else if(tmp_thresh > 255)
            warn("Ignoring --clean %u (too big > 255)", tmp_thresh);
          else
            args->clean_threshold = tmp_thresh;
        }
        else die("Bad argument for %s <auto|N> where N > 1", cmd);
        args->clean_paths = (args->clean_threshold != 0);
        break;
      case 'X': gen_paths_print_contigs = true; break;
      case 'Y': gen_paths_print_paths = true; break;
      case 'Z': gen_paths_print_reads = true; break;
      case ':': /* BADARG */
      case '?': /* BADCH getopt_long has already printed error */
        // cmd_print_usage(NULL);
        die("`"CMD" thread -h` for help. Bad option: %s", argv[optind-1]);
      default: abort();
    }
  }

  if(args->num_of_threads == 0) args->num_of_threads = DEFAULT_NTHREADS;

  // Check that optind+1 == argc
  if(optind+1 > argc)
    cmd_print_usage("Expected exactly one graph file");
  else if(optind+1 < argc)
    cmd_print_usage("Expected only one graph file. What is this: '%s'", argv[optind]);

  char *graph_path = argv[optind];
  status("Reading graph: %s", graph_path);

  if(!used) cmd_print_usage("Ignored arguments after last --seq");

  if(dump_seq_n > 1) die("Cannot specify --seq-gaps <out> more than once");
  if(dump_mp_n > 1) die("Cannot specify --mp-gaps <out> more than once");

  //
  // Open graph graph file
  //
  GraphFileReader *gfile = &args->gfile;
  graph_file_open(gfile, graph_path, true);
  file_filter_update_intocol(&gfile->fltr, 0);
  if(!correct_cmd && graph_file_usedcols(gfile) > 1)
    die("Please specify a single colour e.g. %s:0", gfile->fltr.file_path.buff);

  //
  // Open path files
  //
  size_t path_max_usedcols = 0;
  for(i = 0; i < args->pfiles.len; i++) {
    // file_filter_update_intocol(&args->pfiles.data[i].fltr, 0);
    if(!correct_cmd && path_file_usedcols(&args->pfiles.data[i]) > 1) {
      die("Please specify a single colour e.g. %s:0",
          args->pfiles.data[i].fltr.file_path.buff);
    }
    path_max_usedcols = MAX2(path_max_usedcols,
                             path_file_usedcols(&args->pfiles.data[i]));
  }
  args->path_max_usedcols = path_max_usedcols;

  // Check for compatibility between graph files and path files
  graphs_paths_compatible(gfile, 1, args->pfiles.data, args->pfiles.len);

  // Check ins_gap_min < ins_gap_max
  for(i = 0; i < inputs->len; i++)
  {
    CorrectAlnInput *t = &inputs->data[i];
    t->files.ptr = t;
    if(t->crt_params.ins_gap_min > t->crt_params.ins_gap_max) {
      die("--min-ins %u is greater than --max-ins %u",
          t->crt_params.ins_gap_min, t->crt_params.ins_gap_max);
    }
    correct_aln_input_print(&inputs->data[i]);
    args->max_gap_limit = MAX2(args->max_gap_limit, t->crt_params.ins_gap_max);
  }
}