示例#1
0
/*
 * Takes the command line options and turns them into something we can understand
 */
static opts_t* parse_args(int argc, char *argv[])
{
    if (argc == 1) { usage(stdout); return NULL; }

    const char* optstring = "i:vb:";

    static const struct option lopts[] = {
        { "hash",                       1, 0, 0 },
        { "input",                      1, 0, 'i' },
        { "verbose",                    0, 0, 'v' },
        { "input-fmt",                  1, 0, 0 },
        { NULL, 0, NULL, 0 }
    };

    opts_t* opts = calloc(sizeof(opts_t), 1);
    if (!opts) { perror("cannot allocate option parsing memory"); return NULL; }

    opts->argv_list = stringify_argv(argc+1, argv-1);
    if (opts->argv_list[strlen(opts->argv_list)-1] == ' ') opts->argv_list[strlen(opts->argv_list)-1] = 0;

    // set defaults
    opts->verbose = false;
    opts->hash = DEFAULT_HASH_TYPE;

    int opt;
    int option_index = 0;
    while ((opt = getopt_long(argc, argv, optstring, lopts, &option_index)) != -1) {
        const char *arg;
        switch (opt) {
        case 'i':   opts->input_name = strdup(optarg);
                    break;
        case 'v':   opts->verbose = true;
                    break;
        case 0:     arg = lopts[option_index].name;
                         if (strcmp(arg, "hash") == 0)               opts->hash = decode_hash_name(optarg);
                    else if (strcmp(arg, "input-fmt") == 0)                  opts->input_fmt = strdup(optarg);
                    else {
                        printf("\nUnknown option: %s\n\n", arg); 
                        usage(stdout); free_opts(opts);
                        return NULL;
                    }
                    break;
        default:    printf("Unknown option: '%c'\n", opt);
            /* else fall-through */
        case '?':   usage(stdout); free_opts(opts); return NULL;
        }
    }

    argc -= optind;
    argv += optind;

    if (argc > 0) opts->input_name = strdup(argv[0]);
    optind = 0;

    // some validation and tidying

    // input defaults to stdin
    if (!opts->input_name) opts->input_name = strdup("-");

    // TODO: list valid hash types here
    if (opts->hash == HASH_UNKNOWN) {
        fprintf(stderr, "Unknown has type\n");
        return NULL;
    }

    return opts;
}
示例#2
0
int main(int argc, char **argv) {
    cram_fd *fd;
    bam_file_t *bfd;
    bam_seq_t *bam = NULL;
    char mode[4] = {'w', '\0', '\0', '\0'};
    char *prefix = NULL;
    int decode_md = 0;
    int C;
    int start, end;
    char ref_name[1024] = {0}, *arg_list, *ref_fn = NULL;
    int embed_ref = 0;

    while ((C = getopt(argc, argv, "bu0123456789mp:hr:R:X")) != -1) {
	switch (C) {
	case 'b':
	    mode[1] = 'b';
	    break;

	case 'u':
	    mode[2] = '0';
	    break;

	case '0': case '1': case '2': case '3': case '4':
	case '5': case '6': case '7': case '8': case '9':
	    mode[2] = C;
	    break;

	case 'm':
	    decode_md = 1;
	    break;

	case 'p':
	    prefix = optarg;
	    break;

	case 'h':
	    usage(stdout);
	    return 0;

	case 'r':
	    ref_fn = optarg;
	    break;

	case 'X':
	    embed_ref = 1;
	    break;

	case 'R': {
	    char *cp = strchr(optarg, ':');
	    if (cp) {
		*cp = 0;
		switch (sscanf(cp+1, "%d-%d", &start, &end)) {
		case 1:
		    end = start;
		    break;
		case 2:
		    break;
		default:
		    fprintf(stderr, "Malformed range format\n");
		    return 1;
		}
	    } else {
		start = INT_MIN;
		end   = INT_MAX;
	    }
	    strncpy(ref_name, optarg, 1023);
	    break;
	}

	case '?':
	    fprintf(stderr, "Unrecognised option: -%c\n", optopt);
	    usage(stderr);
	    return 1;
	}
    }

    if (argc - optind != 1 && argc - optind != 2) {
	usage(stderr);
	return 1;
    }

    if (argc - optind == 1) {
	if (NULL == (bfd = bam_open("-", mode))) {
	    fprintf(stderr, "Failed to open SAM/BAM output\n.");
	    return 1;
	}
    } else {
	if (NULL == (bfd = bam_open(argv[optind+1], mode))) {
	    fprintf(stderr, "Failed to open SAM/BAM output\n.");
	    perror(argv[optind+1]);
	    return 1;
	}
    }

    if (NULL == (fd = cram_open(argv[optind], "rb"))) {
	fprintf(stderr, "Error opening CRAM file '%s'.\n", argv[optind]);
	return 1;
    }

    if (*ref_name != 0)
	cram_index_load(fd, argv[optind]);

    if (prefix)
	cram_set_option(fd, CRAM_OPT_PREFIX, prefix);

    if (decode_md)
	cram_set_option(fd, CRAM_OPT_DECODE_MD, decode_md);

    if (embed_ref)
	cram_set_option(fd, CRAM_OPT_EMBED_REF, embed_ref);

    /* Find and load reference */
    cram_load_reference(fd, ref_fn);
    if (!fd->refs && !embed_ref) {
	fprintf(stderr, "Unable to find an appropriate reference.\n"
		"Please specify a valid reference with -r ref.fa option.\n");
	return 1;
    }

    bfd->header = fd->header;

    if (*ref_name != 0) {
	cram_range r;
	int refid = sam_hdr_name2ref(fd->header, ref_name);

	if (refid == -1 && *ref_name != '*') {
	    fprintf(stderr, "Unknown reference name '%s'\n", ref_name);
	    return 1;
	}
	r.refid = refid;
	r.start = start;
	r.end = end;
	cram_set_option(fd, CRAM_OPT_RANGE, &r);
    }

    /* SAM Header */
    if (!(arg_list = stringify_argv(argc, argv)))
	return 1;
    sam_hdr_add_PG(bfd->header, "cram_to_sam",
		   "VN", PACKAGE_VERSION,
		   "CL", arg_list, NULL);
    free(arg_list);

    bam_write_header(bfd);

    while (cram_get_bam_seq(fd, &bam) == 0) {
	bam_put_seq(bfd, bam);
    }

    if (!cram_eof(fd)) {
	fprintf(stderr, "Error while reading file\n");
	return 1;
    }

    cram_close(fd);

    bfd->header = NULL;
    bam_close(bfd);

    free(bam);

    return 0;
}
示例#3
0
int main(int argc, char **argv) {
    cram_fd *out;
    bam_file_t *in;
    bam_seq_t *s = NULL;
    char *out_fn;
    int level = '\0'; // nul terminate string => auto level
    char out_mode[4];
    int c, verbose = 0;
    int s_opt = 0, S_opt = 0, embed_ref = 0;
    char *arg_list, *ref_fn = NULL;

    while ((c = getopt(argc, argv, "u0123456789hvs:S:V:r:X")) != -1) {
	switch (c) {
	case '0': case '1': case '2': case '3': case '4':
	case '5': case '6': case '7': case '8': case '9':
	    level = c;
	    break;
	    
	case 'u':
	    level = '0';
	    break;

	case 'h':
	    usage(stdout);
	    return 0;

	case 'v':
	    verbose++;
	    break;

	case 's':
	    s_opt = atoi(optarg);
	    break;

	case 'S':
	    S_opt = atoi(optarg);
	    break;

	case 'V':
	    cram_set_option(NULL, CRAM_OPT_VERSION, optarg);
	    break;

	case 'r':
	    ref_fn = optarg;
	    break;

	case 'X':
	    embed_ref = 1;
	    break;

	case '?':
	    fprintf(stderr, "Unrecognised option: -%c\n", optopt);
	    usage(stderr);
	    return 1;
	}
    }

    if (argc - optind != 1 && argc - optind != 2) {
	usage(stderr);
	return 1;
    }

    /* opening */
    if (NULL == (in = bam_open(argv[optind], "rb"))) {
	perror(argv[optind]);
	return 1;
    }

    out_fn = argc - optind == 2 ? argv[optind+1] : "-";
    sprintf(out_mode, "wb%c", level);
    if (NULL == (out = cram_open(out_fn, out_mode))) {
	fprintf(stderr, "Error opening CRAM file '%s'.\n", out_fn);
	return 1;
    }

    /* SAM Header */
    if (!(arg_list = stringify_argv(argc, argv)))
	return 1;
    sam_hdr_add_PG(in->header, "sam_to_cram",
		   "VN", PACKAGE_VERSION,
		   "CL", arg_list, NULL);
    free(arg_list);

    /* Find and load reference */
    if (!ref_fn) {
	SAM_hdr_type *ty = sam_hdr_find(in->header, "SQ", NULL, NULL);
	if (ty) {
	    SAM_hdr_tag *tag;

	    if ((tag = sam_hdr_find_key(in->header, ty, "UR", NULL))) {
		ref_fn  = tag->str + 3;
		if (strncmp(ref_fn, "file:", 5) == 0)
		    ref_fn += 5;
	    }
	}
    }

    out->header = in->header;
    if (ref_fn)
	cram_load_reference(out, ref_fn);

    if (!out->refs) {
	fprintf(stderr, "Unable to open reference.\n"
		"Please specify a valid reference with -r ref.fa option.\n");
	return 1;
    }
    refs2id(out->refs, out->header);

    if (-1 == cram_write_SAM_hdr(out, in->header))
	return 1;

    cram_set_option(out, CRAM_OPT_VERBOSITY, verbose);
    if (s_opt)
	cram_set_option(out, CRAM_OPT_SEQS_PER_SLICE, s_opt);

    if (S_opt)
	cram_set_option(out, CRAM_OPT_SLICES_PER_CONTAINER, S_opt);

    if (embed_ref)
	cram_set_option(out, CRAM_OPT_EMBED_REF, embed_ref);

    /* Sequence iterators */
    while (bam_get_seq(in, &s) > 0) {
	if (-1 == cram_put_bam_seq(out, s)) {
	    fprintf(stderr, "Failed in cram_put_bam_seq()\n");
	    return 1;
	}
    }

    bam_close(in);
    out->header = NULL; // freed by bam_close()
    if (-1 == cram_close(out)) {
	fprintf(stderr, "Failed in cram_close()\n");
	return 1;
    }

    if (s)
	free(s);

    return 0;
}
示例#4
0
int main(int argc, char **argv) {
    scram_fd *in, *out;
    bam_seq_t *s;
    char imode[10], *in_f = "", omode[10], *out_f = "";
    int level = '\0'; // nul terminate string => auto level
    int c, verbose = 0;
    int s_opt = 0, S_opt = 0, embed_ref = 0, ignore_md5 = 0, decode_md = 0;
    char *ref_fn = NULL;
    int start, end, multi_seq = -1, no_ref = 0;
    int use_bz2 = 0, use_arith = 0, use_lzma = 0;
    char ref_name[1024] = {0};
    refs_t *refs;
    int nthreads = 1;
    t_pool *p = NULL;
    int max_reads = -1;
    enum quality_binning binning = BINNING_NONE;

    /* Parse command line arguments */
    while ((c = getopt(argc, argv, "u0123456789hvs:S:V:r:xXeI:O:R:!MmjJZt:BN:")) != -1) {
	switch (c) {
	case '0': case '1': case '2': case '3': case '4':
	case '5': case '6': case '7': case '8': case '9':
	    level = c;
	    break;
	    
	case 'u':
	    level = '0';
	    break;

	case 'h':
	    usage(stdout);
	    return 0;

	case 'v':
	    verbose++;
	    break;

	case 's':
	    s_opt = atoi(optarg);
	    break;

	case 'S':
	    S_opt = atoi(optarg);
	    break;

	case 'm':
	    decode_md = 1;
	    break;

	case 'V':
	    if (cram_set_option(NULL, CRAM_OPT_VERSION, optarg))
		return 1;
	    break;

	case 'r':
	    ref_fn = optarg;
	    break;

	case 'X':
	    fprintf(stderr, "-X is deprecated in favour of -e.\n");
	case 'e':
	    embed_ref = 1;
	    break;

	case 'x':
	    no_ref = 1;
	    break;

	case 'I':
	    in_f = parse_format(optarg);
	    break;

	case 'O':
	    out_f = parse_format(optarg);
	    break;

	case 'R': {
	    char *cp = strchr(optarg, ':');
	    if (cp) {
		*cp = 0;
		switch (sscanf(cp+1, "%d-%d", &start, &end)) {
		case 1:
		    end = start;
		    break;
		case 2:
		    break;
		default:
		    fprintf(stderr, "Malformed range format\n");
		    return 1;
		}
	    } else {
		start = INT_MIN;
		end   = INT_MAX;
	    }
	    strncpy(ref_name, optarg, 1023);
	    break;
	}

	case '!':
	    ignore_md5 = 1;
	    break;

	case 'M':
	    multi_seq = 1;
	    break;

	case 'j':
#ifdef HAVE_LIBBZ2
	    use_bz2 = 1;
#else
	    fprintf(stderr, "Warning: bzip2 support is not compiled into this"
		    " version.\nPlease recompile.\n");
#endif
	    break;

	case 'J':
	    use_arith = 1;
	    break;

	case 'Z':
#ifdef HAVE_LIBLZMA
	    use_lzma = 1;
#else
	    fprintf(stderr, "Warning: lzma support is not compiled into this"
		    " version.\nPlease recompile.\n");
#endif
	    break;

	case 't':
	    nthreads = atoi(optarg);
	    if (nthreads < 1) {
		fprintf(stderr, "Number of threads needs to be >= 1\n");
		return 1;
	    }
	    break;

	case 'B':
	    binning = BINNING_ILLUMINA;
	    break;

	case 'N': // For debugging
	    max_reads = atoi(optarg);
	    break;

	case '?':
	    fprintf(stderr, "Unrecognised option: -%c\n", optopt);
	    usage(stderr);
	    return 1;
	}
    }    

    if (argc - optind > 2) {
	fprintf(stderr, "Usage: scramble [input_file [output_file]]\n");
	return 1;
    }
    

    /* Open up input and output files */
    sprintf(imode, "r%s%c", in_f, level);
    if (argc - optind > 0) {
	if (*in_f == 0)
	    sprintf(imode, "r%s%c", detect_format(argv[optind]), level);
	if (!(in = scram_open(argv[optind], imode))) {
	    fprintf(stderr, "Failed to open file %s\n", argv[optind]);
	    return 1;
	}
    } else {
	if (!(in = scram_open("-", imode))) {
	    fprintf(stderr, "Failed to open file %s\n", argv[optind]);
	    return 1;
	}
    }
    if (!in->is_bam && ref_fn) {
	cram_load_reference(in->c, ref_fn);
	if (!in->c->refs && !embed_ref) {
	    fprintf(stderr, "Unable to find an appropriate reference.\n"
		    "Please specify a valid reference with "
		    "-r ref.fa option.\n");
	    return 1;
	}
    }

    sprintf(omode, "w%s%c", out_f, level);
    if (argc - optind > 1) {
	if (*out_f == 0)
	    sprintf(omode, "w%s%c", detect_format(argv[optind+1]), level);
	if (!(out = scram_open(argv[optind+1], omode))) {
	    fprintf(stderr, "Failed to open file %s\n", argv[optind+1]);
	    return 1;
	}
    } else {
	if (!(out = scram_open("-", omode))) {
	    fprintf(stderr, "Failed to open file %s\n", argv[optind+1]);
	    return 1;
	}
    }


    /* Set any format specific options */
    scram_set_refs(out, refs = scram_get_refs(in));

    scram_set_option(out, CRAM_OPT_VERBOSITY, verbose);
    if (s_opt)
	if (scram_set_option(out, CRAM_OPT_SEQS_PER_SLICE, s_opt))
	    return 1;

    if (S_opt)
	if (scram_set_option(out, CRAM_OPT_SLICES_PER_CONTAINER, S_opt))
	    return 1;

    if (embed_ref)
	if (scram_set_option(out, CRAM_OPT_EMBED_REF, embed_ref))
	    return 1;

    if (use_bz2)
	if (scram_set_option(out, CRAM_OPT_USE_BZIP2, use_bz2))
	    return 1;

    if (use_arith)
	if (scram_set_option(out, CRAM_OPT_USE_ARITH, use_arith))
	    return 1;

    if (use_lzma)
	if (scram_set_option(out, CRAM_OPT_USE_LZMA, use_lzma))
	    return 1;

    if (binning != BINNING_NONE)
	if (scram_set_option(out, CRAM_OPT_BINNING, binning))
	    return 1;

    if (no_ref)
	if (scram_set_option(out, CRAM_OPT_NO_REF, no_ref))
	    return 1;

    if (multi_seq)
	if (scram_set_option(out, CRAM_OPT_MULTI_SEQ_PER_SLICE, multi_seq))
	    return 1;

    if (decode_md) {
	if (no_ref) {
	    fprintf(stderr, "Cannot use -m in conjunction with -x.\n");
	    return 1;
	}
	if (scram_set_option(in, CRAM_OPT_DECODE_MD, decode_md))
	    return 1;
    }

    if (nthreads > 1) {
	if (NULL == (p = t_pool_init(nthreads*2, nthreads)))
	    return 1;

	if (scram_set_option(in,  CRAM_OPT_THREAD_POOL, p))
	    return 1;
	if (scram_set_option(out, CRAM_OPT_THREAD_POOL, p))
	    return 1;
    }

    if (ignore_md5)
	if (scram_set_option(in, CRAM_OPT_IGNORE_MD5, ignore_md5))
	    return 1;
    

    /* Copy header and refs from in to out, for writing purposes */
    scram_set_header(out, scram_get_header(in));

    // Needs doing after loading the header.
    if (ref_fn) {
	if (scram_set_option(out, CRAM_OPT_REFERENCE, ref_fn))
	    return 1;
    } else {
	// Attempt to fill out a cram->refs[] array from @SQ headers
	scram_set_option(out, CRAM_OPT_REFERENCE, NULL);
    }

    if (scram_get_header(out)) {
	char *arg_list = stringify_argv(argc, argv);

	if (!arg_list)
	    return 1;

	if (sam_hdr_add_PG(scram_get_header(out), "scramble",
			   "VN", PACKAGE_VERSION,
			   "CL", arg_list, NULL))
	    return 1;

	if (scram_write_header(out))
	    return 1;

	free(arg_list);

    }


    /* Support for sub-range queries, currently implemented for CRAM only */
    if (*ref_name != 0) {
	cram_range r;
	int refid;

	if (in->is_bam) {
	    fprintf(stderr, "Currently the -R option is only implemented for CRAM indices\n");
	    return 1;
	}
	    
	cram_index_load(in->c, argv[optind]);

	refid = sam_hdr_name2ref(in->c->header, ref_name);

	if (refid == -1 && *ref_name != '*') {
	    fprintf(stderr, "Unknown reference name '%s'\n", ref_name);
	    return 1;
	}
	r.refid = refid;
	r.start = start;
	r.end = end;
	if (scram_set_option(in, CRAM_OPT_RANGE, &r))
	    return 1;
    }

    /* Do the actual file format conversion */
    s = NULL;

    while (scram_get_seq(in, &s) >= 0) {
	if (-1 == scram_put_seq(out, s)) {
	    fprintf(stderr, "Failed to encode sequence\n");
	    return 1;
	}
	if (max_reads >= 0)
	    if (--max_reads == 0)
		break;
    }

    if (max_reads == -1) {
	switch(scram_eof(in)) {
	case 0:
	    fprintf(stderr, "Failed to decode sequence\n");
	    return 1;
	case 2:
	    fprintf(stderr, "Warning: no end-of-file block identified. "
		    "File may be truncated.\n");
	    break;
	case 1: default:
	    // expected case
	    break;
	}
    }

    /* Finally tidy up and close files */
    if (scram_close(in))
	return 1;
    if (scram_close(out))
	return 1;

    if (p)
	t_pool_destroy(p, 0);

    if (s)
	free(s);

    return 0;
}
示例#5
0
int main_reheader(int argc, char *argv[])
{
    int inplace = 0, r, add_PG = 1, c;
    bam_hdr_t *h;
    samFile *in;
    char *arg_list = stringify_argv(argc+1, argv-1);

    static const struct option lopts[] = {
        {"help",     no_argument, NULL, 'h'},
        {"in-place", no_argument, NULL, 'i'},
        {"no-PG",    no_argument, NULL, 'P'},
        {NULL, 0, NULL, 0}
    };

    while ((c = getopt_long(argc, argv, "hiP", lopts, NULL)) >= 0) {
        switch (c) {
        case 'P': add_PG = 0; break;
        case 'i': inplace = 1; break;
        case 'h': usage(stdout, 0); break;
        default:
            fprintf(stderr, "Invalid option '%c'\n", c);
            usage(stderr, 1);
        }
    }

    if (argc - optind != 2)
        usage(stderr, 1);

    { // read the header
        samFile *fph = sam_open(argv[optind], "r");
        if (fph == 0) {
            fprintf(stderr, "[%s] fail to read the header from %s.\n", __func__, argv[optind]);
            return 1;
        }
        h = sam_hdr_read(fph);
        sam_close(fph);
        if (h == NULL) {
            fprintf(stderr, "[%s] failed to read the header for '%s'.\n",
                    __func__, argv[1]);
            return 1;
        }
    }
    in = sam_open(argv[optind+1], inplace?"r+":"r");
    if (in == 0) {
        fprintf(stderr, "[%s] fail to open file %s.\n", __func__, argv[optind+1]);
        return 1;
    }
    if (hts_get_format(in)->format == bam) {
        r = bam_reheader(in->fp.bgzf, h, fileno(stdout), arg_list, add_PG);
    } else {
        if (inplace)
            r = cram_reheader_inplace(in->fp.cram, h, arg_list, add_PG);
        else
            r = cram_reheader(in->fp.cram, h, arg_list, add_PG);
    }

    if (sam_close(in) != 0)
        r = -1;

    bam_hdr_destroy(h);

    if (arg_list)
        free(arg_list);

    return -r;
}