/* * Reads a file and outputs a new CRAM file to stdout with 'h' * replaced as the header. No checks are made to the validity. * * FIXME: error checking */ int cram_reheader(cram_fd *in, bam_hdr_t *h, const char *arg_list, int add_PG) { htsFile *h_out = hts_open("-", "wc"); cram_fd *out = h_out->fp.cram; cram_container *c = NULL; int ret = -1; // Attempt to fill out a cram->refs[] array from @SQ headers cram_fd_set_header(out, sam_hdr_parse_(h->text, h->l_text)); if (add_PG) { if (sam_hdr_add_PG(cram_fd_get_header(out), "samtools", "VN", samtools_version(), arg_list ? "CL": NULL, arg_list ? arg_list : NULL, NULL) != 0) goto err; // Covert back to bam_hdr_t struct free(h->text); h->text = strdup(sam_hdr_str(cram_fd_get_header(out))); h->l_text = sam_hdr_length(cram_fd_get_header(out)); if (!h->text) goto err; } if (sam_hdr_write(h_out, h) != 0) goto err; cram_set_option(out, CRAM_OPT_REFERENCE, NULL); while ((c = cram_read_container(in))) { int32_t i, num_blocks = cram_container_get_num_blocks(c); if (cram_write_container(out, c) != 0) goto err; for (i = 0; i < num_blocks; i++) { cram_block *blk = cram_read_block(in); if (!blk || cram_write_block(out, blk) != 0) { if (blk) cram_free_block(blk); goto err; } cram_free_block(blk); } cram_free_container(c); } ret = 0; err: if (hts_close(h_out) != 0) ret = -1; return ret; }
int main(int argc, char *argv[]) { samFile *in; char *fn_ref = 0; int flag = 0, c, clevel = -1, ignore_sam_err = 0; char moder[8]; bam_hdr_t *h; bam1_t *b; htsFile *out; char modew[8]; int r = 0, exit_code = 0; hts_opt *in_opts = NULL, *out_opts = NULL, *last = NULL; int nreads = 0; int benchmark = 0; while ((c = getopt(argc, argv, "IbDCSl:t:i:o:N:B")) >= 0) { switch (c) { case 'S': flag |= 1; break; case 'b': flag |= 2; break; case 'D': flag |= 4; break; case 'C': flag |= 8; break; case 'B': benchmark = 1; break; case 'l': clevel = atoi(optarg); flag |= 2; break; case 't': fn_ref = optarg; break; case 'I': ignore_sam_err = 1; break; case 'i': if (add_option(&in_opts, optarg)) return 1; break; case 'o': if (add_option(&out_opts, optarg)) return 1; break; case 'N': nreads = atoi(optarg); } } if (argc == optind) { fprintf(stderr, "Usage: samview [-bSCSIB] [-N num_reads] [-l level] [-o option=value] <in.bam>|<in.sam>|<in.cram> [region]\n"); return 1; } strcpy(moder, "r"); if (flag&4) strcat(moder, "c"); else if ((flag&1) == 0) strcat(moder, "b"); in = sam_open(argv[optind], moder); if (in == NULL) { fprintf(stderr, "Error opening \"%s\"\n", argv[optind]); return EXIT_FAILURE; } h = sam_hdr_read(in); h->ignore_sam_err = ignore_sam_err; b = bam_init1(); strcpy(modew, "w"); if (clevel >= 0 && clevel <= 9) sprintf(modew + 1, "%d", clevel); if (flag&8) strcat(modew, "c"); else if (flag&2) strcat(modew, "b"); out = hts_open("-", modew); if (out == NULL) { fprintf(stderr, "Error opening standard output\n"); return EXIT_FAILURE; } /* CRAM output */ if (flag & 8) { int ret; // Parse input header and use for CRAM output out->fp.cram->header = sam_hdr_parse_(h->text, h->l_text); // Create CRAM references arrays if (fn_ref) ret = cram_set_option(out->fp.cram, CRAM_OPT_REFERENCE, fn_ref); else // Attempt to fill out a cram->refs[] array from @SQ headers ret = cram_set_option(out->fp.cram, CRAM_OPT_REFERENCE, NULL); if (ret != 0) return EXIT_FAILURE; } // Process any options; currently cram only. for (; in_opts; in_opts = (last=in_opts)->next, free(last)) { hts_set_opt(in, in_opts->opt, in_opts->val); if (in_opts->opt == CRAM_OPT_REFERENCE) if (hts_set_opt(out, in_opts->opt, in_opts->val) != 0) return EXIT_FAILURE; } for (; out_opts; out_opts = (last=out_opts)->next, free(last)) if (hts_set_opt(out, out_opts->opt, out_opts->val) != 0) return EXIT_FAILURE; if (!benchmark) sam_hdr_write(out, h); if (optind + 1 < argc && !(flag&1)) { // BAM input and has a region int i; hts_idx_t *idx; if ((idx = sam_index_load(in, argv[optind])) == 0) { fprintf(stderr, "[E::%s] fail to load the BAM index\n", __func__); return 1; } for (i = optind + 1; i < argc; ++i) { hts_itr_t *iter; if ((iter = sam_itr_querys(idx, h, argv[i])) == 0) { fprintf(stderr, "[E::%s] fail to parse region '%s'\n", __func__, argv[i]); continue; } while ((r = sam_itr_next(in, iter, b)) >= 0) { if (!benchmark && sam_write1(out, h, b) < 0) { fprintf(stderr, "Error writing output.\n"); exit_code = 1; break; } if (nreads && --nreads == 0) break; } hts_itr_destroy(iter); } hts_idx_destroy(idx); } else while ((r = sam_read1(in, h, b)) >= 0) { if (!benchmark && sam_write1(out, h, b) < 0) { fprintf(stderr, "Error writing output.\n"); exit_code = 1; break; } if (nreads && --nreads == 0) break; } if (r < -1) { fprintf(stderr, "Error parsing input.\n"); exit_code = 1; } r = sam_close(out); if (r < 0) { fprintf(stderr, "Error closing output.\n"); exit_code = 1; } bam_destroy1(b); bam_hdr_destroy(h); r = sam_close(in); if (r < 0) { fprintf(stderr, "Error closing input.\n"); exit_code = 1; } return exit_code; }
int main(int argc, char **argv) { scram_fd **in, *out; int n_input, i; bam_seq_t **s; char imode[10], *in_f = "", omode[10], *out_f = ""; int level = '\0'; // nul terminate string => auto level int c, verbose = 0; int s_opt = 0, S_opt = 0, embed_ref = 0; char *ref_fn = NULL; int start, end; char ref_name[1024] = {0}; refs_t *refs = NULL; /* Parse command line arguments */ while ((c = getopt(argc, argv, "u0123456789hvs:S:V:r:XI:O:R:")) != -1) { switch (c) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': level = c; break; case 'u': level = '0'; break; case 'h': usage(stdout); return 0; case 'v': verbose++; break; case 's': s_opt = atoi(optarg); break; case 'S': S_opt = atoi(optarg); break; case 'V': cram_set_option(NULL, CRAM_OPT_VERSION, optarg); break; case 'r': ref_fn = optarg; break; case 'X': embed_ref = 1; break; case 'I': in_f = parse_format(optarg); break; case 'O': out_f = parse_format(optarg); break; case 'R': { char *cp = strchr(optarg, ':'); if (cp) { *cp = 0; switch (sscanf(cp+1, "%d-%d", &start, &end)) { case 1: end = start; break; case 2: break; default: fprintf(stderr, "Malformed range format\n"); return 1; } } else { start = INT_MIN; end = INT_MAX; } strncpy(ref_name, optarg, 1023); break; } case '?': fprintf(stderr, "Unrecognised option: -%c\n", optopt); usage(stderr); return 1; } } /* Open output file */ sprintf(omode, "w%s%c", out_f, level); if (!(out = scram_open("-", omode))) { fprintf(stderr, "Failed to open bam file %s\n", argv[optind+1]); return 1; } /* Open multiple input files */ sprintf(imode, "r%s%c", in_f, level); n_input = argc - optind; if (!n_input) { fprintf(stderr, "No input files specified.\n"); return 1; } if (!(in = malloc(n_input * sizeof(*in)))) return 1; if (!(s = malloc(n_input * sizeof(*s)))) return 1; for (i = 0; i < n_input; i++, optind++) { s[i] = NULL; if (*in_f == 0) sprintf(imode, "r%s%c", detect_format(argv[optind]), level); if (!(in[i] = scram_open(argv[optind], imode))) { fprintf(stderr, "Failed to open bam file %s\n", argv[optind]); return 1; } if (i && !hdr_compare(scram_get_header(in[0]), scram_get_header(in[i]))) { fprintf(stderr, "Incompatible reference sequence list.\n"); fprintf(stderr, "Currently the @SQ lines need to be identical" " in all files.\n"); return 1; } if (!refs && scram_get_refs(in[i])) refs = scram_get_refs(in[i]); if (refs && scram_set_option(in[i], CRAM_OPT_SHARED_REF, refs)) return 1; } /* Set any format specific options */ if (refs) scram_set_option(out, CRAM_OPT_SHARED_REF, refs); if (scram_set_option(out, CRAM_OPT_VERBOSITY, verbose)) return 1; if (s_opt) if (scram_set_option(out, CRAM_OPT_SEQS_PER_SLICE, s_opt)) return 1; if (S_opt) if (scram_set_option(out, CRAM_OPT_SLICES_PER_CONTAINER, S_opt)) return 1; if (embed_ref) if (scram_set_option(out, CRAM_OPT_EMBED_REF, embed_ref)) return 1; /* Copy header and refs from in to out, for writing purposes */ // FIXME: do proper merging of @PG lines // FIXME: track mapping of old PG aux name to new PG aux name per seq scram_set_header(out, sam_hdr_dup(scram_get_header(in[0]))); // Needs doing after loading the header. if (ref_fn) if (scram_set_option(out, CRAM_OPT_REFERENCE, ref_fn)) return 1; if (scram_get_header(in[0])) { if (scram_write_header(out)) return 1; } /* Do the actual file format conversion */ fprintf(stderr, "Opening and loading initial seqs\n"); for (i = 0; i < n_input; i++) { if (scram_get_seq(in[i], &s[i]) < 0) { if (scram_close(in[i])) return 1; in[i] = NULL; free(s[i]); continue; } } fprintf(stderr, "Merging...\n"); for (;;) { int64_t best_val = INT64_MAX; int best_j = 0, j; for (j = 0; j < n_input; j++) { bam_seq_t *b = s[j]; uint64_t x; if (!in[j]) continue; x = (((uint64_t)bam_ref(b))<<33) | (bam_pos(b)<<2) | (bam_strand(b)<<1) | !(bam_flag(b) & BAM_FREAD1); if (best_val > x) { best_val = x; best_j = j; } } if (best_val == INT64_MAX) { // all closed break; } if (-1 == scram_put_seq(out, s[best_j])) return 1; if (scram_get_seq(in[best_j], &s[best_j]) < 0) { if (scram_close(in[best_j])) return 1; in[best_j] = NULL; free(s[best_j]); } } for (i = 0; i < n_input; i++) { if (!in[i]) continue; scram_close(in[i]); if (s[i]) free(s[i]); } /* Finally tidy up and close files */ if (scram_close(out)) return 1; free(in); free(s); return 0; }
int main(int argc, char **argv) { cram_fd *fd; bam_file_t *bfd; bam_seq_t *bam = NULL; char mode[4] = {'w', '\0', '\0', '\0'}; char *prefix = NULL; int decode_md = 0; int C; int start, end; char ref_name[1024] = {0}, *arg_list, *ref_fn = NULL; int embed_ref = 0; while ((C = getopt(argc, argv, "bu0123456789mp:hr:R:X")) != -1) { switch (C) { case 'b': mode[1] = 'b'; break; case 'u': mode[2] = '0'; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': mode[2] = C; break; case 'm': decode_md = 1; break; case 'p': prefix = optarg; break; case 'h': usage(stdout); return 0; case 'r': ref_fn = optarg; break; case 'X': embed_ref = 1; break; case 'R': { char *cp = strchr(optarg, ':'); if (cp) { *cp = 0; switch (sscanf(cp+1, "%d-%d", &start, &end)) { case 1: end = start; break; case 2: break; default: fprintf(stderr, "Malformed range format\n"); return 1; } } else { start = INT_MIN; end = INT_MAX; } strncpy(ref_name, optarg, 1023); break; } case '?': fprintf(stderr, "Unrecognised option: -%c\n", optopt); usage(stderr); return 1; } } if (argc - optind != 1 && argc - optind != 2) { usage(stderr); return 1; } if (argc - optind == 1) { if (NULL == (bfd = bam_open("-", mode))) { fprintf(stderr, "Failed to open SAM/BAM output\n."); return 1; } } else { if (NULL == (bfd = bam_open(argv[optind+1], mode))) { fprintf(stderr, "Failed to open SAM/BAM output\n."); perror(argv[optind+1]); return 1; } } if (NULL == (fd = cram_open(argv[optind], "rb"))) { fprintf(stderr, "Error opening CRAM file '%s'.\n", argv[optind]); return 1; } if (*ref_name != 0) cram_index_load(fd, argv[optind]); if (prefix) cram_set_option(fd, CRAM_OPT_PREFIX, prefix); if (decode_md) cram_set_option(fd, CRAM_OPT_DECODE_MD, decode_md); if (embed_ref) cram_set_option(fd, CRAM_OPT_EMBED_REF, embed_ref); /* Find and load reference */ cram_load_reference(fd, ref_fn); if (!fd->refs && !embed_ref) { fprintf(stderr, "Unable to find an appropriate reference.\n" "Please specify a valid reference with -r ref.fa option.\n"); return 1; } bfd->header = fd->header; if (*ref_name != 0) { cram_range r; int refid = sam_hdr_name2ref(fd->header, ref_name); if (refid == -1 && *ref_name != '*') { fprintf(stderr, "Unknown reference name '%s'\n", ref_name); return 1; } r.refid = refid; r.start = start; r.end = end; cram_set_option(fd, CRAM_OPT_RANGE, &r); } /* SAM Header */ if (!(arg_list = stringify_argv(argc, argv))) return 1; sam_hdr_add_PG(bfd->header, "cram_to_sam", "VN", PACKAGE_VERSION, "CL", arg_list, NULL); free(arg_list); bam_write_header(bfd); while (cram_get_bam_seq(fd, &bam) == 0) { bam_put_seq(bfd, bam); } if (!cram_eof(fd)) { fprintf(stderr, "Error while reading file\n"); return 1; } cram_close(fd); bfd->header = NULL; bam_close(bfd); free(bam); return 0; }
int main(int argc, char **argv) { scram_fd *in, *out; bam_seq_t *s; char imode[10], *in_f = "", omode[10], *out_f = ""; int level = '\0'; // nul terminate string => auto level int c, verbose = 0; int s_opt = 0, S_opt = 0, embed_ref = 0, ignore_md5 = 0, decode_md = 0; char *ref_fn = NULL; int start, end, multi_seq = -1, no_ref = 0; int use_bz2 = 0, use_arith = 0, use_lzma = 0; char ref_name[1024] = {0}; refs_t *refs; int nthreads = 1; t_pool *p = NULL; int max_reads = -1; enum quality_binning binning = BINNING_NONE; /* Parse command line arguments */ while ((c = getopt(argc, argv, "u0123456789hvs:S:V:r:xXeI:O:R:!MmjJZt:BN:")) != -1) { switch (c) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': level = c; break; case 'u': level = '0'; break; case 'h': usage(stdout); return 0; case 'v': verbose++; break; case 's': s_opt = atoi(optarg); break; case 'S': S_opt = atoi(optarg); break; case 'm': decode_md = 1; break; case 'V': if (cram_set_option(NULL, CRAM_OPT_VERSION, optarg)) return 1; break; case 'r': ref_fn = optarg; break; case 'X': fprintf(stderr, "-X is deprecated in favour of -e.\n"); case 'e': embed_ref = 1; break; case 'x': no_ref = 1; break; case 'I': in_f = parse_format(optarg); break; case 'O': out_f = parse_format(optarg); break; case 'R': { char *cp = strchr(optarg, ':'); if (cp) { *cp = 0; switch (sscanf(cp+1, "%d-%d", &start, &end)) { case 1: end = start; break; case 2: break; default: fprintf(stderr, "Malformed range format\n"); return 1; } } else { start = INT_MIN; end = INT_MAX; } strncpy(ref_name, optarg, 1023); break; } case '!': ignore_md5 = 1; break; case 'M': multi_seq = 1; break; case 'j': #ifdef HAVE_LIBBZ2 use_bz2 = 1; #else fprintf(stderr, "Warning: bzip2 support is not compiled into this" " version.\nPlease recompile.\n"); #endif break; case 'J': use_arith = 1; break; case 'Z': #ifdef HAVE_LIBLZMA use_lzma = 1; #else fprintf(stderr, "Warning: lzma support is not compiled into this" " version.\nPlease recompile.\n"); #endif break; case 't': nthreads = atoi(optarg); if (nthreads < 1) { fprintf(stderr, "Number of threads needs to be >= 1\n"); return 1; } break; case 'B': binning = BINNING_ILLUMINA; break; case 'N': // For debugging max_reads = atoi(optarg); break; case '?': fprintf(stderr, "Unrecognised option: -%c\n", optopt); usage(stderr); return 1; } } if (argc - optind > 2) { fprintf(stderr, "Usage: scramble [input_file [output_file]]\n"); return 1; } /* Open up input and output files */ sprintf(imode, "r%s%c", in_f, level); if (argc - optind > 0) { if (*in_f == 0) sprintf(imode, "r%s%c", detect_format(argv[optind]), level); if (!(in = scram_open(argv[optind], imode))) { fprintf(stderr, "Failed to open file %s\n", argv[optind]); return 1; } } else { if (!(in = scram_open("-", imode))) { fprintf(stderr, "Failed to open file %s\n", argv[optind]); return 1; } } if (!in->is_bam && ref_fn) { cram_load_reference(in->c, ref_fn); if (!in->c->refs && !embed_ref) { fprintf(stderr, "Unable to find an appropriate reference.\n" "Please specify a valid reference with " "-r ref.fa option.\n"); return 1; } } sprintf(omode, "w%s%c", out_f, level); if (argc - optind > 1) { if (*out_f == 0) sprintf(omode, "w%s%c", detect_format(argv[optind+1]), level); if (!(out = scram_open(argv[optind+1], omode))) { fprintf(stderr, "Failed to open file %s\n", argv[optind+1]); return 1; } } else { if (!(out = scram_open("-", omode))) { fprintf(stderr, "Failed to open file %s\n", argv[optind+1]); return 1; } } /* Set any format specific options */ scram_set_refs(out, refs = scram_get_refs(in)); scram_set_option(out, CRAM_OPT_VERBOSITY, verbose); if (s_opt) if (scram_set_option(out, CRAM_OPT_SEQS_PER_SLICE, s_opt)) return 1; if (S_opt) if (scram_set_option(out, CRAM_OPT_SLICES_PER_CONTAINER, S_opt)) return 1; if (embed_ref) if (scram_set_option(out, CRAM_OPT_EMBED_REF, embed_ref)) return 1; if (use_bz2) if (scram_set_option(out, CRAM_OPT_USE_BZIP2, use_bz2)) return 1; if (use_arith) if (scram_set_option(out, CRAM_OPT_USE_ARITH, use_arith)) return 1; if (use_lzma) if (scram_set_option(out, CRAM_OPT_USE_LZMA, use_lzma)) return 1; if (binning != BINNING_NONE) if (scram_set_option(out, CRAM_OPT_BINNING, binning)) return 1; if (no_ref) if (scram_set_option(out, CRAM_OPT_NO_REF, no_ref)) return 1; if (multi_seq) if (scram_set_option(out, CRAM_OPT_MULTI_SEQ_PER_SLICE, multi_seq)) return 1; if (decode_md) { if (no_ref) { fprintf(stderr, "Cannot use -m in conjunction with -x.\n"); return 1; } if (scram_set_option(in, CRAM_OPT_DECODE_MD, decode_md)) return 1; } if (nthreads > 1) { if (NULL == (p = t_pool_init(nthreads*2, nthreads))) return 1; if (scram_set_option(in, CRAM_OPT_THREAD_POOL, p)) return 1; if (scram_set_option(out, CRAM_OPT_THREAD_POOL, p)) return 1; } if (ignore_md5) if (scram_set_option(in, CRAM_OPT_IGNORE_MD5, ignore_md5)) return 1; /* Copy header and refs from in to out, for writing purposes */ scram_set_header(out, scram_get_header(in)); // Needs doing after loading the header. if (ref_fn) { if (scram_set_option(out, CRAM_OPT_REFERENCE, ref_fn)) return 1; } else { // Attempt to fill out a cram->refs[] array from @SQ headers scram_set_option(out, CRAM_OPT_REFERENCE, NULL); } if (scram_get_header(out)) { char *arg_list = stringify_argv(argc, argv); if (!arg_list) return 1; if (sam_hdr_add_PG(scram_get_header(out), "scramble", "VN", PACKAGE_VERSION, "CL", arg_list, NULL)) return 1; if (scram_write_header(out)) return 1; free(arg_list); } /* Support for sub-range queries, currently implemented for CRAM only */ if (*ref_name != 0) { cram_range r; int refid; if (in->is_bam) { fprintf(stderr, "Currently the -R option is only implemented for CRAM indices\n"); return 1; } cram_index_load(in->c, argv[optind]); refid = sam_hdr_name2ref(in->c->header, ref_name); if (refid == -1 && *ref_name != '*') { fprintf(stderr, "Unknown reference name '%s'\n", ref_name); return 1; } r.refid = refid; r.start = start; r.end = end; if (scram_set_option(in, CRAM_OPT_RANGE, &r)) return 1; } /* Do the actual file format conversion */ s = NULL; while (scram_get_seq(in, &s) >= 0) { if (-1 == scram_put_seq(out, s)) { fprintf(stderr, "Failed to encode sequence\n"); return 1; } if (max_reads >= 0) if (--max_reads == 0) break; } if (max_reads == -1) { switch(scram_eof(in)) { case 0: fprintf(stderr, "Failed to decode sequence\n"); return 1; case 2: fprintf(stderr, "Warning: no end-of-file block identified. " "File may be truncated.\n"); break; case 1: default: // expected case break; } } /* Finally tidy up and close files */ if (scram_close(in)) return 1; if (scram_close(out)) return 1; if (p) t_pool_destroy(p, 0); if (s) free(s); return 0; }
int main(int argc, char **argv) { cram_fd *out; bam_file_t *in; bam_seq_t *s = NULL; char *out_fn; int level = '\0'; // nul terminate string => auto level char out_mode[4]; int c, verbose = 0; int s_opt = 0, S_opt = 0, embed_ref = 0; char *arg_list, *ref_fn = NULL; while ((c = getopt(argc, argv, "u0123456789hvs:S:V:r:X")) != -1) { switch (c) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': level = c; break; case 'u': level = '0'; break; case 'h': usage(stdout); return 0; case 'v': verbose++; break; case 's': s_opt = atoi(optarg); break; case 'S': S_opt = atoi(optarg); break; case 'V': cram_set_option(NULL, CRAM_OPT_VERSION, optarg); break; case 'r': ref_fn = optarg; break; case 'X': embed_ref = 1; break; case '?': fprintf(stderr, "Unrecognised option: -%c\n", optopt); usage(stderr); return 1; } } if (argc - optind != 1 && argc - optind != 2) { usage(stderr); return 1; } /* opening */ if (NULL == (in = bam_open(argv[optind], "rb"))) { perror(argv[optind]); return 1; } out_fn = argc - optind == 2 ? argv[optind+1] : "-"; sprintf(out_mode, "wb%c", level); if (NULL == (out = cram_open(out_fn, out_mode))) { fprintf(stderr, "Error opening CRAM file '%s'.\n", out_fn); return 1; } /* SAM Header */ if (!(arg_list = stringify_argv(argc, argv))) return 1; sam_hdr_add_PG(in->header, "sam_to_cram", "VN", PACKAGE_VERSION, "CL", arg_list, NULL); free(arg_list); /* Find and load reference */ if (!ref_fn) { SAM_hdr_type *ty = sam_hdr_find(in->header, "SQ", NULL, NULL); if (ty) { SAM_hdr_tag *tag; if ((tag = sam_hdr_find_key(in->header, ty, "UR", NULL))) { ref_fn = tag->str + 3; if (strncmp(ref_fn, "file:", 5) == 0) ref_fn += 5; } } } out->header = in->header; if (ref_fn) cram_load_reference(out, ref_fn); if (!out->refs) { fprintf(stderr, "Unable to open reference.\n" "Please specify a valid reference with -r ref.fa option.\n"); return 1; } refs2id(out->refs, out->header); if (-1 == cram_write_SAM_hdr(out, in->header)) return 1; cram_set_option(out, CRAM_OPT_VERBOSITY, verbose); if (s_opt) cram_set_option(out, CRAM_OPT_SEQS_PER_SLICE, s_opt); if (S_opt) cram_set_option(out, CRAM_OPT_SLICES_PER_CONTAINER, S_opt); if (embed_ref) cram_set_option(out, CRAM_OPT_EMBED_REF, embed_ref); /* Sequence iterators */ while (bam_get_seq(in, &s) > 0) { if (-1 == cram_put_bam_seq(out, s)) { fprintf(stderr, "Failed in cram_put_bam_seq()\n"); return 1; } } bam_close(in); out->header = NULL; // freed by bam_close() if (-1 == cram_close(out)) { fprintf(stderr, "Failed in cram_close()\n"); return 1; } if (s) free(s); return 0; }
int main(int argc, char *argv[]) { samFile *in; char *fn_ref = 0; int flag = 0, c, clevel = -1, ignore_sam_err = 0; char moder[8]; bam_hdr_t *h; bam1_t *b; htsFile *out; char modew[8]; int r = 0, exit_code = 0; while ((c = getopt(argc, argv, "IbDCSl:t:")) >= 0) { switch (c) { case 'S': flag |= 1; break; case 'b': flag |= 2; break; case 'D': flag |= 4; break; case 'C': flag |= 8; break; case 'l': clevel = atoi(optarg); flag |= 2; break; case 't': fn_ref = optarg; break; case 'I': ignore_sam_err = 1; break; } } if (argc == optind) { fprintf(stderr, "Usage: samview [-bSCSI] [-l level] <in.bam>|<in.sam>|<in.cram> [region]\n"); return 1; } strcpy(moder, "r"); if (flag&4) strcat(moder, "c"); else if ((flag&1) == 0) strcat(moder, "b"); in = sam_open(argv[optind], moder); h = sam_hdr_read(in); h->ignore_sam_err = ignore_sam_err; b = bam_init1(); strcpy(modew, "w"); if (clevel >= 0 && clevel <= 9) sprintf(modew + 1, "%d", clevel); if (flag&8) strcat(modew, "c"); else if (flag&2) strcat(modew, "b"); out = hts_open("-", modew); /* CRAM output */ if (flag & 8) { // Parse input header and use for CRAM output out->fp.cram->header = sam_hdr_parse_(h->text, h->l_text); // Create CRAM references arrays if (fn_ref) cram_set_option(out->fp.cram, CRAM_OPT_REFERENCE, fn_ref); else // Attempt to fill out a cram->refs[] array from @SQ headers cram_set_option(out->fp.cram, CRAM_OPT_REFERENCE, NULL); } sam_hdr_write(out, h); if (optind + 1 < argc && !(flag&1)) { // BAM input and has a region int i; hts_idx_t *idx; if ((idx = bam_index_load(argv[optind])) == 0) { fprintf(stderr, "[E::%s] fail to load the BAM index\n", __func__); return 1; } for (i = optind + 1; i < argc; ++i) { hts_itr_t *iter; if ((iter = bam_itr_querys(idx, h, argv[i])) == 0) { fprintf(stderr, "[E::%s] fail to parse region '%s'\n", __func__, argv[i]); continue; } while ((r = bam_itr_next(in, iter, b)) >= 0) { if (sam_write1(out, h, b) < 0) { fprintf(stderr, "Error writing output.\n"); exit_code = 1; break; } } hts_itr_destroy(iter); } hts_idx_destroy(idx); } else while ((r = sam_read1(in, h, b)) >= 0) { if (sam_write1(out, h, b) < 0) { fprintf(stderr, "Error writing output.\n"); exit_code = 1; break; } } sam_close(out); if (r < -1) { fprintf(stderr, "Error parsing input.\n"); exit_code = 1; } bam_destroy1(b); bam_hdr_destroy(h); sam_close(in); return exit_code; }
/* * CRAM files don't store the RG:Z:ID per read in the aux field. * Instead they have a numerical data series (RG) to point each read * back to the Nth @RG line in the file. This means that we may need * to edit the RG data series (if the files were produced from * "samtools split" for example). * * The encoding method is stored in the compression header. Typical * examples: * * RG => EXTERNAL {18} # Block content-id 18 holds RG values * # as a series of ITF8 encoded values * * RG => HUFFMAN {1, 255, 255, 255, 255, 255, 1, 0} * # One RG value #-1. (No RG) * * RG => HUFFMAN {1, 0, 1, 0} # One RG value #0 (always first RG) * * RG => HUFFMAN {2, 0, 1, 2, 1, 1} * # Two RG values, #0 and #1, written * # to the CORE block and possibly * # mixed with other data series. * * A single value can (but may not be) implemented as a zero bit * huffman code. In this situation we can change the meta-data in the * compression header to renumber an RG value.. */ int cram_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outcram) { samFile *out; cram_fd *out_c; int i, vers_maj, vers_min; khash_s2i *rg2id = NULL; bam_hdr_t *new_h = NULL; /* Check consistent versioning and compatible headers */ if (!(new_h = cram_cat_check_hdr(nfn, fn, h, &rg2id, &vers_maj, &vers_min))) return -1; /* Open the file with cram_vers */ char vers[100]; sprintf(vers, "%d.%d", vers_maj, vers_min); out = sam_open(outcram, "wc"); if (out == 0) { fprintf(stderr, "[%s] ERROR: fail to open output file '%s'.\n", __func__, outcram); return 1; } out_c = out->fp.cram; cram_set_option(out_c, CRAM_OPT_VERSION, vers); //fprintf(stderr, "Creating cram vers %s\n", vers); cram_fd_set_header(out_c, sam_hdr_parse_(new_h->text, new_h->l_text)); // needed? sam_hdr_write(out, new_h); for (i = 0; i < nfn; ++i) { samFile *in; cram_fd *in_c; cram_container *c; bam_hdr_t *old; int new_rg = -1; in = sam_open(fn[i], "rc"); if (in == 0) { fprintf(stderr, "[%s] ERROR: fail to open file '%s'.\n", __func__, fn[i]); return -1; } in_c = in->fp.cram; old = sam_hdr_read(in); khash_s2i *rg2id_in = hash_rg(old); // Compute RG mapping if suitable for changing. if (rg2id_in->n_id == 1) { int _; new_rg = hash_s2i_inc(rg2id, rg2id_in->id[0], NULL, &_); } else { new_rg = 0; } hash_s2i_free(rg2id_in); // Copy contains and blocks within them while ((c = cram_read_container(in_c))) { cram_block *blk; if (cram_container_is_empty(in_c)) { if (cram_write_container(out_c, c) != 0) return -1; // Container compression header if (!(blk = cram_read_block(in_c))) return -1; if (cram_write_block(out_c, blk) != 0) { cram_free_block(blk); return -1; } cram_free_block(blk); cram_free_container(c); continue; } // If we have just one RG key and new_rg != 0 then // we need to edit the compression header. IF WE CAN. if (new_rg) { int zero = 0; //fprintf(stderr, "Transcode RG %d to %d\n", 0, new_rg); cram_transcode_rg(in_c, out_c, c, 1, &zero, &new_rg); } else { int32_t num_slices; // Not switching rg so do the usual read/write loop if (cram_write_container(out_c, c) != 0) return -1; // Container compression header if (!(blk = cram_read_block(in_c))) return -1; if (cram_write_block(out_c, blk) != 0) { cram_free_block(blk); return -1; } cram_free_block(blk); // Container num_blocks can be invalid, due to a bug. // Instead we iterate in slice context instead. (void)cram_container_get_landmarks(c, &num_slices); cram_copy_slice(in_c, out_c, num_slices); } cram_free_container(c); } bam_hdr_destroy(old); sam_close(in); } sam_close(out); hash_s2i_free(rg2id); bam_hdr_destroy(new_h); return 0; }