Пример #1
0
/*
 * Reads a file and outputs a new CRAM file to stdout with 'h'
 * replaced as the header.  No checks are made to the validity.
 *
 * FIXME: error checking
 */
int cram_reheader(cram_fd *in, bam_hdr_t *h, const char *arg_list, int add_PG)
{
    htsFile *h_out = hts_open("-", "wc");
    cram_fd *out = h_out->fp.cram;
    cram_container *c = NULL;
    int ret = -1;

    // Attempt to fill out a cram->refs[] array from @SQ headers
    cram_fd_set_header(out, sam_hdr_parse_(h->text, h->l_text));
    if (add_PG) {
        if (sam_hdr_add_PG(cram_fd_get_header(out), "samtools",
                           "VN", samtools_version(),
                           arg_list ? "CL": NULL,
                           arg_list ? arg_list : NULL,
                           NULL) != 0)
            goto err;

        // Covert back to bam_hdr_t struct
        free(h->text);
        h->text = strdup(sam_hdr_str(cram_fd_get_header(out)));
        h->l_text = sam_hdr_length(cram_fd_get_header(out));
        if (!h->text)
            goto err;
    }

    if (sam_hdr_write(h_out, h) != 0)
        goto err;
    cram_set_option(out, CRAM_OPT_REFERENCE, NULL);

    while ((c = cram_read_container(in))) {
        int32_t i, num_blocks = cram_container_get_num_blocks(c);
        if (cram_write_container(out, c) != 0)
            goto err;

        for (i = 0; i < num_blocks; i++) {
            cram_block *blk = cram_read_block(in);
            if (!blk || cram_write_block(out, blk) != 0) {
                if (blk) cram_free_block(blk);
                goto err;
            }
            cram_free_block(blk);
        }
        cram_free_container(c);
    }

    ret = 0;

 err:
    if (hts_close(h_out) != 0)
        ret = -1;

    return ret;
}
Пример #2
0
int main(int argc, char *argv[])
{
    samFile *in;
    char *fn_ref = 0;
    int flag = 0, c, clevel = -1, ignore_sam_err = 0;
    char moder[8];
    bam_hdr_t *h;
    bam1_t *b;
    htsFile *out;
    char modew[8];
    int r = 0, exit_code = 0;
    hts_opt *in_opts = NULL, *out_opts = NULL, *last = NULL;
    int nreads = 0;
    int benchmark = 0;

    while ((c = getopt(argc, argv, "IbDCSl:t:i:o:N:B")) >= 0) {
        switch (c) {
        case 'S': flag |= 1; break;
        case 'b': flag |= 2; break;
        case 'D': flag |= 4; break;
        case 'C': flag |= 8; break;
        case 'B': benchmark = 1; break;
        case 'l': clevel = atoi(optarg); flag |= 2; break;
        case 't': fn_ref = optarg; break;
        case 'I': ignore_sam_err = 1; break;
        case 'i': if (add_option(&in_opts,  optarg)) return 1; break;
        case 'o': if (add_option(&out_opts, optarg)) return 1; break;
        case 'N': nreads = atoi(optarg);
        }
    }
    if (argc == optind) {
        fprintf(stderr, "Usage: samview [-bSCSIB] [-N num_reads] [-l level] [-o option=value] <in.bam>|<in.sam>|<in.cram> [region]\n");
        return 1;
    }
    strcpy(moder, "r");
    if (flag&4) strcat(moder, "c");
    else if ((flag&1) == 0) strcat(moder, "b");

    in = sam_open(argv[optind], moder);
    if (in == NULL) {
        fprintf(stderr, "Error opening \"%s\"\n", argv[optind]);
        return EXIT_FAILURE;
    }
    h = sam_hdr_read(in);
    h->ignore_sam_err = ignore_sam_err;
    b = bam_init1();

    strcpy(modew, "w");
    if (clevel >= 0 && clevel <= 9) sprintf(modew + 1, "%d", clevel);
    if (flag&8) strcat(modew, "c");
    else if (flag&2) strcat(modew, "b");
    out = hts_open("-", modew);
    if (out == NULL) {
        fprintf(stderr, "Error opening standard output\n");
        return EXIT_FAILURE;
    }

    /* CRAM output */
    if (flag & 8) {
        int ret;

        // Parse input header and use for CRAM output
        out->fp.cram->header = sam_hdr_parse_(h->text, h->l_text);

        // Create CRAM references arrays
        if (fn_ref)
            ret = cram_set_option(out->fp.cram, CRAM_OPT_REFERENCE, fn_ref);
        else
            // Attempt to fill out a cram->refs[] array from @SQ headers
            ret = cram_set_option(out->fp.cram, CRAM_OPT_REFERENCE, NULL);

        if (ret != 0)
            return EXIT_FAILURE;
    }

    // Process any options; currently cram only.
    for (; in_opts;  in_opts = (last=in_opts)->next, free(last)) {
        hts_set_opt(in,  in_opts->opt,  in_opts->val);
        if (in_opts->opt == CRAM_OPT_REFERENCE)
            if (hts_set_opt(out,  in_opts->opt,  in_opts->val) != 0)
                return EXIT_FAILURE;
    }
    for (; out_opts;  out_opts = (last=out_opts)->next, free(last))
        if (hts_set_opt(out, out_opts->opt,  out_opts->val) != 0)
            return EXIT_FAILURE;

    if (!benchmark)
        sam_hdr_write(out, h);
    if (optind + 1 < argc && !(flag&1)) { // BAM input and has a region
        int i;
        hts_idx_t *idx;
        if ((idx = sam_index_load(in, argv[optind])) == 0) {
            fprintf(stderr, "[E::%s] fail to load the BAM index\n", __func__);
            return 1;
        }
        for (i = optind + 1; i < argc; ++i) {
            hts_itr_t *iter;
            if ((iter = sam_itr_querys(idx, h, argv[i])) == 0) {
                fprintf(stderr, "[E::%s] fail to parse region '%s'\n", __func__, argv[i]);
                continue;
            }
            while ((r = sam_itr_next(in, iter, b)) >= 0) {
                if (!benchmark && sam_write1(out, h, b) < 0) {
                    fprintf(stderr, "Error writing output.\n");
                    exit_code = 1;
                    break;
                }
                if (nreads && --nreads == 0)
                    break;
            }
            hts_itr_destroy(iter);
        }
        hts_idx_destroy(idx);
    } else while ((r = sam_read1(in, h, b)) >= 0) {
        if (!benchmark && sam_write1(out, h, b) < 0) {
            fprintf(stderr, "Error writing output.\n");
            exit_code = 1;
            break;
        }
        if (nreads && --nreads == 0)
            break;
    }

    if (r < -1) {
        fprintf(stderr, "Error parsing input.\n");
        exit_code = 1;
    }

    r = sam_close(out);
    if (r < 0) {
        fprintf(stderr, "Error closing output.\n");
        exit_code = 1;
    }

    bam_destroy1(b);
    bam_hdr_destroy(h);

    r = sam_close(in);
    if (r < 0) {
        fprintf(stderr, "Error closing input.\n");
        exit_code = 1;
    }

    return exit_code;
}
Пример #3
0
int main(int argc, char **argv) {
    scram_fd **in, *out;
    int n_input, i;
    bam_seq_t **s;
    char imode[10], *in_f = "", omode[10], *out_f = "";
    int level = '\0'; // nul terminate string => auto level
    int c, verbose = 0;
    int s_opt = 0, S_opt = 0, embed_ref = 0;
    char *ref_fn = NULL;
    int start, end;
    char ref_name[1024] = {0};
    refs_t *refs = NULL;

    /* Parse command line arguments */
    while ((c = getopt(argc, argv, "u0123456789hvs:S:V:r:XI:O:R:")) != -1) {
	switch (c) {
	case '0': case '1': case '2': case '3': case '4':
	case '5': case '6': case '7': case '8': case '9':
	    level = c;
	    break;
	    
	case 'u':
	    level = '0';
	    break;

	case 'h':
	    usage(stdout);
	    return 0;

	case 'v':
	    verbose++;
	    break;

	case 's':
	    s_opt = atoi(optarg);
	    break;

	case 'S':
	    S_opt = atoi(optarg);
	    break;

	case 'V':
	    cram_set_option(NULL, CRAM_OPT_VERSION, optarg);
	    break;

	case 'r':
	    ref_fn = optarg;
	    break;

	case 'X':
	    embed_ref = 1;
	    break;

	case 'I':
	    in_f = parse_format(optarg);
	    break;

	case 'O':
	    out_f = parse_format(optarg);
	    break;

	case 'R': {
	    char *cp = strchr(optarg, ':');
	    if (cp) {
		*cp = 0;
		switch (sscanf(cp+1, "%d-%d", &start, &end)) {
		case 1:
		    end = start;
		    break;
		case 2:
		    break;
		default:
		    fprintf(stderr, "Malformed range format\n");
		    return 1;
		}
	    } else {
		start = INT_MIN;
		end   = INT_MAX;
	    }
	    strncpy(ref_name, optarg, 1023);
	    break;
	}

	case '?':
	    fprintf(stderr, "Unrecognised option: -%c\n", optopt);
	    usage(stderr);
	    return 1;
	}
    }    

    /* Open output file */
    sprintf(omode, "w%s%c", out_f, level);
    if (!(out = scram_open("-", omode))) {
	fprintf(stderr, "Failed to open bam file %s\n", argv[optind+1]);
	return 1;
    }

    /* Open multiple input files */
    sprintf(imode, "r%s%c", in_f, level);
    n_input = argc - optind;
    if (!n_input) {
	fprintf(stderr, "No input files specified.\n");
	return 1;
    }
    if (!(in = malloc(n_input * sizeof(*in))))
	return 1;
    if (!(s = malloc(n_input * sizeof(*s))))
	return 1;
    for (i = 0; i < n_input; i++, optind++) {
	s[i] = NULL;
	if (*in_f == 0)
	    sprintf(imode, "r%s%c", detect_format(argv[optind]), level);
	if (!(in[i] = scram_open(argv[optind], imode))) {
	    fprintf(stderr, "Failed to open bam file %s\n", argv[optind]);
	    return 1;
	}
	if (i && !hdr_compare(scram_get_header(in[0]),
			      scram_get_header(in[i]))) {
	    fprintf(stderr, "Incompatible reference sequence list.\n");
	    fprintf(stderr, "Currently the @SQ lines need to be identical"
		    " in all files.\n");
	    return 1;
	}

	if (!refs && scram_get_refs(in[i]))
	    refs = scram_get_refs(in[i]);

	if (refs && scram_set_option(in[i], CRAM_OPT_SHARED_REF, refs))
	    return 1;
    }

    /* Set any format specific options */
    if (refs)
	scram_set_option(out, CRAM_OPT_SHARED_REF, refs);

    if (scram_set_option(out, CRAM_OPT_VERBOSITY, verbose))
	return 1;
    if (s_opt)
	if (scram_set_option(out, CRAM_OPT_SEQS_PER_SLICE, s_opt))
	    return 1;

    if (S_opt)
	if (scram_set_option(out, CRAM_OPT_SLICES_PER_CONTAINER, S_opt))
	    return 1;

    if (embed_ref)
	if (scram_set_option(out, CRAM_OPT_EMBED_REF, embed_ref))
	    return 1;
    
    /* Copy header and refs from in to out, for writing purposes */
    // FIXME: do proper merging of @PG lines
    // FIXME: track mapping of old PG aux name to new PG aux name per seq
    scram_set_header(out, sam_hdr_dup(scram_get_header(in[0])));

    // Needs doing after loading the header.
    if (ref_fn)
	if (scram_set_option(out, CRAM_OPT_REFERENCE, ref_fn))
	    return 1;

    if (scram_get_header(in[0])) {
	if (scram_write_header(out))
	    return 1;
    }


    /* Do the actual file format conversion */
    fprintf(stderr, "Opening and loading initial seqs\n");
    for (i = 0; i < n_input; i++) {
	if (scram_get_seq(in[i], &s[i]) < 0) {
	    if (scram_close(in[i]))
		return 1;
	    in[i] = NULL;
	    free(s[i]);
	    continue;
	}
    }

    fprintf(stderr, "Merging...\n");
    for (;;) {
	int64_t best_val = INT64_MAX;
	int best_j = 0, j;

	for (j = 0; j < n_input; j++) {
	    bam_seq_t *b = s[j];
	    uint64_t x;
	    if (!in[j])
		continue;

	    x = (((uint64_t)bam_ref(b))<<33)
		| (bam_pos(b)<<2)
		| (bam_strand(b)<<1)
		| !(bam_flag(b) & BAM_FREAD1);
	    if (best_val > x) {
		best_val = x;
		best_j = j;
	    }
	}
	
	if (best_val == INT64_MAX) { // all closed
	    break;
	}

	if (-1 == scram_put_seq(out, s[best_j]))
	    return 1;
	
	if (scram_get_seq(in[best_j], &s[best_j]) < 0) {
	    if (scram_close(in[best_j]))
		return 1;
	    in[best_j] = NULL;
	    free(s[best_j]);
	}
    }

    for (i = 0; i < n_input; i++) {
	if (!in[i])
	    continue;
	scram_close(in[i]);
	if (s[i])
	    free(s[i]);
    }

    /* Finally tidy up and close files */
    if (scram_close(out))
	return 1;
    free(in);
    free(s);

    return 0;
}
Пример #4
0
int main(int argc, char **argv) {
    cram_fd *fd;
    bam_file_t *bfd;
    bam_seq_t *bam = NULL;
    char mode[4] = {'w', '\0', '\0', '\0'};
    char *prefix = NULL;
    int decode_md = 0;
    int C;
    int start, end;
    char ref_name[1024] = {0}, *arg_list, *ref_fn = NULL;
    int embed_ref = 0;

    while ((C = getopt(argc, argv, "bu0123456789mp:hr:R:X")) != -1) {
	switch (C) {
	case 'b':
	    mode[1] = 'b';
	    break;

	case 'u':
	    mode[2] = '0';
	    break;

	case '0': case '1': case '2': case '3': case '4':
	case '5': case '6': case '7': case '8': case '9':
	    mode[2] = C;
	    break;

	case 'm':
	    decode_md = 1;
	    break;

	case 'p':
	    prefix = optarg;
	    break;

	case 'h':
	    usage(stdout);
	    return 0;

	case 'r':
	    ref_fn = optarg;
	    break;

	case 'X':
	    embed_ref = 1;
	    break;

	case 'R': {
	    char *cp = strchr(optarg, ':');
	    if (cp) {
		*cp = 0;
		switch (sscanf(cp+1, "%d-%d", &start, &end)) {
		case 1:
		    end = start;
		    break;
		case 2:
		    break;
		default:
		    fprintf(stderr, "Malformed range format\n");
		    return 1;
		}
	    } else {
		start = INT_MIN;
		end   = INT_MAX;
	    }
	    strncpy(ref_name, optarg, 1023);
	    break;
	}

	case '?':
	    fprintf(stderr, "Unrecognised option: -%c\n", optopt);
	    usage(stderr);
	    return 1;
	}
    }

    if (argc - optind != 1 && argc - optind != 2) {
	usage(stderr);
	return 1;
    }

    if (argc - optind == 1) {
	if (NULL == (bfd = bam_open("-", mode))) {
	    fprintf(stderr, "Failed to open SAM/BAM output\n.");
	    return 1;
	}
    } else {
	if (NULL == (bfd = bam_open(argv[optind+1], mode))) {
	    fprintf(stderr, "Failed to open SAM/BAM output\n.");
	    perror(argv[optind+1]);
	    return 1;
	}
    }

    if (NULL == (fd = cram_open(argv[optind], "rb"))) {
	fprintf(stderr, "Error opening CRAM file '%s'.\n", argv[optind]);
	return 1;
    }

    if (*ref_name != 0)
	cram_index_load(fd, argv[optind]);

    if (prefix)
	cram_set_option(fd, CRAM_OPT_PREFIX, prefix);

    if (decode_md)
	cram_set_option(fd, CRAM_OPT_DECODE_MD, decode_md);

    if (embed_ref)
	cram_set_option(fd, CRAM_OPT_EMBED_REF, embed_ref);

    /* Find and load reference */
    cram_load_reference(fd, ref_fn);
    if (!fd->refs && !embed_ref) {
	fprintf(stderr, "Unable to find an appropriate reference.\n"
		"Please specify a valid reference with -r ref.fa option.\n");
	return 1;
    }

    bfd->header = fd->header;

    if (*ref_name != 0) {
	cram_range r;
	int refid = sam_hdr_name2ref(fd->header, ref_name);

	if (refid == -1 && *ref_name != '*') {
	    fprintf(stderr, "Unknown reference name '%s'\n", ref_name);
	    return 1;
	}
	r.refid = refid;
	r.start = start;
	r.end = end;
	cram_set_option(fd, CRAM_OPT_RANGE, &r);
    }

    /* SAM Header */
    if (!(arg_list = stringify_argv(argc, argv)))
	return 1;
    sam_hdr_add_PG(bfd->header, "cram_to_sam",
		   "VN", PACKAGE_VERSION,
		   "CL", arg_list, NULL);
    free(arg_list);

    bam_write_header(bfd);

    while (cram_get_bam_seq(fd, &bam) == 0) {
	bam_put_seq(bfd, bam);
    }

    if (!cram_eof(fd)) {
	fprintf(stderr, "Error while reading file\n");
	return 1;
    }

    cram_close(fd);

    bfd->header = NULL;
    bam_close(bfd);

    free(bam);

    return 0;
}
Пример #5
0
int main(int argc, char **argv) {
    scram_fd *in, *out;
    bam_seq_t *s;
    char imode[10], *in_f = "", omode[10], *out_f = "";
    int level = '\0'; // nul terminate string => auto level
    int c, verbose = 0;
    int s_opt = 0, S_opt = 0, embed_ref = 0, ignore_md5 = 0, decode_md = 0;
    char *ref_fn = NULL;
    int start, end, multi_seq = -1, no_ref = 0;
    int use_bz2 = 0, use_arith = 0, use_lzma = 0;
    char ref_name[1024] = {0};
    refs_t *refs;
    int nthreads = 1;
    t_pool *p = NULL;
    int max_reads = -1;
    enum quality_binning binning = BINNING_NONE;

    /* Parse command line arguments */
    while ((c = getopt(argc, argv, "u0123456789hvs:S:V:r:xXeI:O:R:!MmjJZt:BN:")) != -1) {
	switch (c) {
	case '0': case '1': case '2': case '3': case '4':
	case '5': case '6': case '7': case '8': case '9':
	    level = c;
	    break;
	    
	case 'u':
	    level = '0';
	    break;

	case 'h':
	    usage(stdout);
	    return 0;

	case 'v':
	    verbose++;
	    break;

	case 's':
	    s_opt = atoi(optarg);
	    break;

	case 'S':
	    S_opt = atoi(optarg);
	    break;

	case 'm':
	    decode_md = 1;
	    break;

	case 'V':
	    if (cram_set_option(NULL, CRAM_OPT_VERSION, optarg))
		return 1;
	    break;

	case 'r':
	    ref_fn = optarg;
	    break;

	case 'X':
	    fprintf(stderr, "-X is deprecated in favour of -e.\n");
	case 'e':
	    embed_ref = 1;
	    break;

	case 'x':
	    no_ref = 1;
	    break;

	case 'I':
	    in_f = parse_format(optarg);
	    break;

	case 'O':
	    out_f = parse_format(optarg);
	    break;

	case 'R': {
	    char *cp = strchr(optarg, ':');
	    if (cp) {
		*cp = 0;
		switch (sscanf(cp+1, "%d-%d", &start, &end)) {
		case 1:
		    end = start;
		    break;
		case 2:
		    break;
		default:
		    fprintf(stderr, "Malformed range format\n");
		    return 1;
		}
	    } else {
		start = INT_MIN;
		end   = INT_MAX;
	    }
	    strncpy(ref_name, optarg, 1023);
	    break;
	}

	case '!':
	    ignore_md5 = 1;
	    break;

	case 'M':
	    multi_seq = 1;
	    break;

	case 'j':
#ifdef HAVE_LIBBZ2
	    use_bz2 = 1;
#else
	    fprintf(stderr, "Warning: bzip2 support is not compiled into this"
		    " version.\nPlease recompile.\n");
#endif
	    break;

	case 'J':
	    use_arith = 1;
	    break;

	case 'Z':
#ifdef HAVE_LIBLZMA
	    use_lzma = 1;
#else
	    fprintf(stderr, "Warning: lzma support is not compiled into this"
		    " version.\nPlease recompile.\n");
#endif
	    break;

	case 't':
	    nthreads = atoi(optarg);
	    if (nthreads < 1) {
		fprintf(stderr, "Number of threads needs to be >= 1\n");
		return 1;
	    }
	    break;

	case 'B':
	    binning = BINNING_ILLUMINA;
	    break;

	case 'N': // For debugging
	    max_reads = atoi(optarg);
	    break;

	case '?':
	    fprintf(stderr, "Unrecognised option: -%c\n", optopt);
	    usage(stderr);
	    return 1;
	}
    }    

    if (argc - optind > 2) {
	fprintf(stderr, "Usage: scramble [input_file [output_file]]\n");
	return 1;
    }
    

    /* Open up input and output files */
    sprintf(imode, "r%s%c", in_f, level);
    if (argc - optind > 0) {
	if (*in_f == 0)
	    sprintf(imode, "r%s%c", detect_format(argv[optind]), level);
	if (!(in = scram_open(argv[optind], imode))) {
	    fprintf(stderr, "Failed to open file %s\n", argv[optind]);
	    return 1;
	}
    } else {
	if (!(in = scram_open("-", imode))) {
	    fprintf(stderr, "Failed to open file %s\n", argv[optind]);
	    return 1;
	}
    }
    if (!in->is_bam && ref_fn) {
	cram_load_reference(in->c, ref_fn);
	if (!in->c->refs && !embed_ref) {
	    fprintf(stderr, "Unable to find an appropriate reference.\n"
		    "Please specify a valid reference with "
		    "-r ref.fa option.\n");
	    return 1;
	}
    }

    sprintf(omode, "w%s%c", out_f, level);
    if (argc - optind > 1) {
	if (*out_f == 0)
	    sprintf(omode, "w%s%c", detect_format(argv[optind+1]), level);
	if (!(out = scram_open(argv[optind+1], omode))) {
	    fprintf(stderr, "Failed to open file %s\n", argv[optind+1]);
	    return 1;
	}
    } else {
	if (!(out = scram_open("-", omode))) {
	    fprintf(stderr, "Failed to open file %s\n", argv[optind+1]);
	    return 1;
	}
    }


    /* Set any format specific options */
    scram_set_refs(out, refs = scram_get_refs(in));

    scram_set_option(out, CRAM_OPT_VERBOSITY, verbose);
    if (s_opt)
	if (scram_set_option(out, CRAM_OPT_SEQS_PER_SLICE, s_opt))
	    return 1;

    if (S_opt)
	if (scram_set_option(out, CRAM_OPT_SLICES_PER_CONTAINER, S_opt))
	    return 1;

    if (embed_ref)
	if (scram_set_option(out, CRAM_OPT_EMBED_REF, embed_ref))
	    return 1;

    if (use_bz2)
	if (scram_set_option(out, CRAM_OPT_USE_BZIP2, use_bz2))
	    return 1;

    if (use_arith)
	if (scram_set_option(out, CRAM_OPT_USE_ARITH, use_arith))
	    return 1;

    if (use_lzma)
	if (scram_set_option(out, CRAM_OPT_USE_LZMA, use_lzma))
	    return 1;

    if (binning != BINNING_NONE)
	if (scram_set_option(out, CRAM_OPT_BINNING, binning))
	    return 1;

    if (no_ref)
	if (scram_set_option(out, CRAM_OPT_NO_REF, no_ref))
	    return 1;

    if (multi_seq)
	if (scram_set_option(out, CRAM_OPT_MULTI_SEQ_PER_SLICE, multi_seq))
	    return 1;

    if (decode_md) {
	if (no_ref) {
	    fprintf(stderr, "Cannot use -m in conjunction with -x.\n");
	    return 1;
	}
	if (scram_set_option(in, CRAM_OPT_DECODE_MD, decode_md))
	    return 1;
    }

    if (nthreads > 1) {
	if (NULL == (p = t_pool_init(nthreads*2, nthreads)))
	    return 1;

	if (scram_set_option(in,  CRAM_OPT_THREAD_POOL, p))
	    return 1;
	if (scram_set_option(out, CRAM_OPT_THREAD_POOL, p))
	    return 1;
    }

    if (ignore_md5)
	if (scram_set_option(in, CRAM_OPT_IGNORE_MD5, ignore_md5))
	    return 1;
    

    /* Copy header and refs from in to out, for writing purposes */
    scram_set_header(out, scram_get_header(in));

    // Needs doing after loading the header.
    if (ref_fn) {
	if (scram_set_option(out, CRAM_OPT_REFERENCE, ref_fn))
	    return 1;
    } else {
	// Attempt to fill out a cram->refs[] array from @SQ headers
	scram_set_option(out, CRAM_OPT_REFERENCE, NULL);
    }

    if (scram_get_header(out)) {
	char *arg_list = stringify_argv(argc, argv);

	if (!arg_list)
	    return 1;

	if (sam_hdr_add_PG(scram_get_header(out), "scramble",
			   "VN", PACKAGE_VERSION,
			   "CL", arg_list, NULL))
	    return 1;

	if (scram_write_header(out))
	    return 1;

	free(arg_list);

    }


    /* Support for sub-range queries, currently implemented for CRAM only */
    if (*ref_name != 0) {
	cram_range r;
	int refid;

	if (in->is_bam) {
	    fprintf(stderr, "Currently the -R option is only implemented for CRAM indices\n");
	    return 1;
	}
	    
	cram_index_load(in->c, argv[optind]);

	refid = sam_hdr_name2ref(in->c->header, ref_name);

	if (refid == -1 && *ref_name != '*') {
	    fprintf(stderr, "Unknown reference name '%s'\n", ref_name);
	    return 1;
	}
	r.refid = refid;
	r.start = start;
	r.end = end;
	if (scram_set_option(in, CRAM_OPT_RANGE, &r))
	    return 1;
    }

    /* Do the actual file format conversion */
    s = NULL;

    while (scram_get_seq(in, &s) >= 0) {
	if (-1 == scram_put_seq(out, s)) {
	    fprintf(stderr, "Failed to encode sequence\n");
	    return 1;
	}
	if (max_reads >= 0)
	    if (--max_reads == 0)
		break;
    }

    if (max_reads == -1) {
	switch(scram_eof(in)) {
	case 0:
	    fprintf(stderr, "Failed to decode sequence\n");
	    return 1;
	case 2:
	    fprintf(stderr, "Warning: no end-of-file block identified. "
		    "File may be truncated.\n");
	    break;
	case 1: default:
	    // expected case
	    break;
	}
    }

    /* Finally tidy up and close files */
    if (scram_close(in))
	return 1;
    if (scram_close(out))
	return 1;

    if (p)
	t_pool_destroy(p, 0);

    if (s)
	free(s);

    return 0;
}
Пример #6
0
int main(int argc, char **argv) {
    cram_fd *out;
    bam_file_t *in;
    bam_seq_t *s = NULL;
    char *out_fn;
    int level = '\0'; // nul terminate string => auto level
    char out_mode[4];
    int c, verbose = 0;
    int s_opt = 0, S_opt = 0, embed_ref = 0;
    char *arg_list, *ref_fn = NULL;

    while ((c = getopt(argc, argv, "u0123456789hvs:S:V:r:X")) != -1) {
	switch (c) {
	case '0': case '1': case '2': case '3': case '4':
	case '5': case '6': case '7': case '8': case '9':
	    level = c;
	    break;
	    
	case 'u':
	    level = '0';
	    break;

	case 'h':
	    usage(stdout);
	    return 0;

	case 'v':
	    verbose++;
	    break;

	case 's':
	    s_opt = atoi(optarg);
	    break;

	case 'S':
	    S_opt = atoi(optarg);
	    break;

	case 'V':
	    cram_set_option(NULL, CRAM_OPT_VERSION, optarg);
	    break;

	case 'r':
	    ref_fn = optarg;
	    break;

	case 'X':
	    embed_ref = 1;
	    break;

	case '?':
	    fprintf(stderr, "Unrecognised option: -%c\n", optopt);
	    usage(stderr);
	    return 1;
	}
    }

    if (argc - optind != 1 && argc - optind != 2) {
	usage(stderr);
	return 1;
    }

    /* opening */
    if (NULL == (in = bam_open(argv[optind], "rb"))) {
	perror(argv[optind]);
	return 1;
    }

    out_fn = argc - optind == 2 ? argv[optind+1] : "-";
    sprintf(out_mode, "wb%c", level);
    if (NULL == (out = cram_open(out_fn, out_mode))) {
	fprintf(stderr, "Error opening CRAM file '%s'.\n", out_fn);
	return 1;
    }

    /* SAM Header */
    if (!(arg_list = stringify_argv(argc, argv)))
	return 1;
    sam_hdr_add_PG(in->header, "sam_to_cram",
		   "VN", PACKAGE_VERSION,
		   "CL", arg_list, NULL);
    free(arg_list);

    /* Find and load reference */
    if (!ref_fn) {
	SAM_hdr_type *ty = sam_hdr_find(in->header, "SQ", NULL, NULL);
	if (ty) {
	    SAM_hdr_tag *tag;

	    if ((tag = sam_hdr_find_key(in->header, ty, "UR", NULL))) {
		ref_fn  = tag->str + 3;
		if (strncmp(ref_fn, "file:", 5) == 0)
		    ref_fn += 5;
	    }
	}
    }

    out->header = in->header;
    if (ref_fn)
	cram_load_reference(out, ref_fn);

    if (!out->refs) {
	fprintf(stderr, "Unable to open reference.\n"
		"Please specify a valid reference with -r ref.fa option.\n");
	return 1;
    }
    refs2id(out->refs, out->header);

    if (-1 == cram_write_SAM_hdr(out, in->header))
	return 1;

    cram_set_option(out, CRAM_OPT_VERBOSITY, verbose);
    if (s_opt)
	cram_set_option(out, CRAM_OPT_SEQS_PER_SLICE, s_opt);

    if (S_opt)
	cram_set_option(out, CRAM_OPT_SLICES_PER_CONTAINER, S_opt);

    if (embed_ref)
	cram_set_option(out, CRAM_OPT_EMBED_REF, embed_ref);

    /* Sequence iterators */
    while (bam_get_seq(in, &s) > 0) {
	if (-1 == cram_put_bam_seq(out, s)) {
	    fprintf(stderr, "Failed in cram_put_bam_seq()\n");
	    return 1;
	}
    }

    bam_close(in);
    out->header = NULL; // freed by bam_close()
    if (-1 == cram_close(out)) {
	fprintf(stderr, "Failed in cram_close()\n");
	return 1;
    }

    if (s)
	free(s);

    return 0;
}
Пример #7
0
int main(int argc, char *argv[])
{
	samFile *in;
	char *fn_ref = 0;
	int flag = 0, c, clevel = -1, ignore_sam_err = 0;
	char moder[8];
	bam_hdr_t *h;
	bam1_t *b;
	htsFile *out;
	char modew[8];
	int r = 0, exit_code = 0;

	while ((c = getopt(argc, argv, "IbDCSl:t:")) >= 0) {
		switch (c) {
		case 'S': flag |= 1; break;
		case 'b': flag |= 2; break;
		case 'D': flag |= 4; break;
		case 'C': flag |= 8; break;
		case 'l': clevel = atoi(optarg); flag |= 2; break;
		case 't': fn_ref = optarg; break;
		case 'I': ignore_sam_err = 1; break;
		}
	}
	if (argc == optind) {
		fprintf(stderr, "Usage: samview [-bSCSI] [-l level] <in.bam>|<in.sam>|<in.cram> [region]\n");
		return 1;
	}
	strcpy(moder, "r");
	if (flag&4) strcat(moder, "c");
	else if ((flag&1) == 0) strcat(moder, "b");

	in = sam_open(argv[optind], moder);
	h = sam_hdr_read(in);
	h->ignore_sam_err = ignore_sam_err;
	b = bam_init1();

	strcpy(modew, "w");
	if (clevel >= 0 && clevel <= 9) sprintf(modew + 1, "%d", clevel);
	if (flag&8) strcat(modew, "c");
	else if (flag&2) strcat(modew, "b");
	out = hts_open("-", modew);

	/* CRAM output */
	if (flag & 8) {
	    // Parse input header and use for CRAM output
	    out->fp.cram->header = sam_hdr_parse_(h->text, h->l_text);

	    // Create CRAM references arrays
	    if (fn_ref)
		cram_set_option(out->fp.cram, CRAM_OPT_REFERENCE, fn_ref);
	    else
		// Attempt to fill out a cram->refs[] array from @SQ headers
		cram_set_option(out->fp.cram, CRAM_OPT_REFERENCE, NULL);
	}

	sam_hdr_write(out, h);
	if (optind + 1 < argc && !(flag&1)) { // BAM input and has a region
	    int i;
	    hts_idx_t *idx;
	    if ((idx = bam_index_load(argv[optind])) == 0) {
		fprintf(stderr, "[E::%s] fail to load the BAM index\n", __func__);
		return 1;
	    }
	    for (i = optind + 1; i < argc; ++i) {
		hts_itr_t *iter;
		if ((iter = bam_itr_querys(idx, h, argv[i])) == 0) {
		    fprintf(stderr, "[E::%s] fail to parse region '%s'\n", __func__, argv[i]);
		    continue;
		}
		while ((r = bam_itr_next(in, iter, b)) >= 0) {
		    if (sam_write1(out, h, b) < 0) {
			fprintf(stderr, "Error writing output.\n");
			exit_code = 1;
			break;
		    }
		}
		hts_itr_destroy(iter);
	    }
	    hts_idx_destroy(idx);
	} else while ((r = sam_read1(in, h, b)) >= 0) {
		if (sam_write1(out, h, b) < 0) {
			fprintf(stderr, "Error writing output.\n");
			exit_code = 1;
			break;
		}
	}
	sam_close(out);

	if (r < -1) {
	    fprintf(stderr, "Error parsing input.\n");
	    exit_code = 1;
	}

	bam_destroy1(b);
	bam_hdr_destroy(h);
	sam_close(in);
	return exit_code;
}
Пример #8
0
/*
 * CRAM files don't store the RG:Z:ID per read in the aux field.
 * Instead they have a numerical data series (RG) to point each read
 * back to the Nth @RG line in the file.  This means that we may need
 * to edit the RG data series (if the files were produced from
 * "samtools split" for example).
 *
 * The encoding method is stored in the compression header. Typical
 * examples:
 *
 * RG => EXTERNAL {18}           # Block content-id 18 holds RG values
 *                               # as a series of ITF8 encoded values
 *
 * RG => HUFFMAN {1, 255, 255, 255, 255, 255, 1, 0}
 *                               # One RG value #-1.  (No RG)
 *
 * RG => HUFFMAN {1, 0, 1, 0}    # One RG value #0 (always first RG)
 *
 * RG => HUFFMAN {2, 0, 1, 2, 1, 1}
 *                               # Two RG values, #0 and #1, written
 *                               # to the CORE block and possibly
 *                               # mixed with other data series.
 *
 * A single value can (but may not be) implemented as a zero bit
 * huffman code.  In this situation we can change the meta-data in the
 * compression header to renumber an RG value..
 */
int cram_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outcram)
{
    samFile *out;
    cram_fd *out_c;
    int i, vers_maj, vers_min;
    khash_s2i *rg2id = NULL;
    bam_hdr_t *new_h = NULL;

    /* Check consistent versioning and compatible headers */
    if (!(new_h = cram_cat_check_hdr(nfn, fn, h, &rg2id, &vers_maj, &vers_min)))
        return -1;

    /* Open the file with cram_vers */
    char vers[100];
    sprintf(vers, "%d.%d", vers_maj, vers_min);
    out = sam_open(outcram, "wc");
    if (out == 0) {
        fprintf(stderr, "[%s] ERROR: fail to open output file '%s'.\n", __func__, outcram);
        return 1;
    }
    out_c = out->fp.cram;
    cram_set_option(out_c, CRAM_OPT_VERSION, vers);
    //fprintf(stderr, "Creating cram vers %s\n", vers);

    cram_fd_set_header(out_c, sam_hdr_parse_(new_h->text,  new_h->l_text)); // needed?
    sam_hdr_write(out, new_h);

    for (i = 0; i < nfn; ++i) {
        samFile *in;
        cram_fd *in_c;
        cram_container *c;
        bam_hdr_t *old;
        int new_rg = -1;

        in = sam_open(fn[i], "rc");
        if (in == 0) {
            fprintf(stderr, "[%s] ERROR: fail to open file '%s'.\n", __func__, fn[i]);
            return -1;
        }
        in_c = in->fp.cram;

        old = sam_hdr_read(in);
        khash_s2i *rg2id_in = hash_rg(old);

        // Compute RG mapping if suitable for changing.
        if (rg2id_in->n_id == 1) {
            int _;
            new_rg = hash_s2i_inc(rg2id, rg2id_in->id[0], NULL, &_);
        } else {
            new_rg = 0;
        }

        hash_s2i_free(rg2id_in);


        // Copy contains and blocks within them
        while ((c = cram_read_container(in_c))) {
            cram_block *blk;

           if (cram_container_is_empty(in_c)) {
                if (cram_write_container(out_c, c) != 0)
                    return -1;

                // Container compression header
                if (!(blk = cram_read_block(in_c)))
                    return -1;
                if (cram_write_block(out_c, blk) != 0) {
                    cram_free_block(blk);
                    return -1;
                }
                cram_free_block(blk);
                cram_free_container(c);

                continue;
            }

            // If we have just one RG key and new_rg != 0 then
            // we need to edit the compression header. IF WE CAN.
            if (new_rg) {
                int zero = 0;
                //fprintf(stderr, "Transcode RG %d to %d\n", 0, new_rg);
                cram_transcode_rg(in_c, out_c, c, 1, &zero, &new_rg);
            } else {
                int32_t num_slices;

                // Not switching rg so do the usual read/write loop
                if (cram_write_container(out_c, c) != 0)
                    return -1;

                // Container compression header
                if (!(blk = cram_read_block(in_c)))
                    return -1;
                if (cram_write_block(out_c, blk) != 0) {
                    cram_free_block(blk);
                    return -1;
                }
                cram_free_block(blk);


                // Container num_blocks can be invalid, due to a bug.
                // Instead we iterate in slice context instead.
                (void)cram_container_get_landmarks(c, &num_slices);
                cram_copy_slice(in_c, out_c, num_slices);
            }

            cram_free_container(c);
        }

        bam_hdr_destroy(old);
        sam_close(in);
    }
    sam_close(out);

    hash_s2i_free(rg2id);
    bam_hdr_destroy(new_h);

    return 0;
}