Beispiel #1
1
Datei: sam.c Projekt: atks/vt
static void copy_check_alignment(const char *infname, const char *informat,
    const char *outfname, const char *outmode, const char *outref)
{
    samFile *in = sam_open(infname, "r");
    samFile *out = sam_open(outfname, outmode);
    bam1_t *aln = bam_init1();
    bam_hdr_t *header = NULL;
    int res;

    if (!in) {
        fail("couldn't open %s", infname);
        goto err;
    }
    if (!out) {
        fail("couldn't open %s with mode %s", outfname, outmode);
        goto err;
    }
    if (!aln) {
        fail("bam_init1() failed");
        goto err;
    }

    if (outref) {
        if (hts_set_opt(out, CRAM_OPT_REFERENCE, outref) < 0) {
            fail("setting reference %s for %s", outref, outfname);
            goto err;
        }
    }

    header = sam_hdr_read(in);
    if (!header) {
        fail("reading header from %s", infname);
        goto err;
    }
    if (sam_hdr_write(out, header) < 0) fail("writing headers to %s", outfname);

    while ((res = sam_read1(in, header, aln)) >= 0) {
        int mod4 = ((intptr_t) bam_get_cigar(aln)) % 4;
        if (mod4 != 0)
            fail("%s CIGAR not 4-byte aligned; offset is 4k+%d for \"%s\"",
                 informat, mod4, bam_get_qname(aln));

        if (sam_write1(out, header, aln) < 0) fail("writing to %s", outfname);
    }
    if (res < -1) {
        fail("failed to read alignment from %s", infname);
    }

 err:
    bam_destroy1(aln);
    bam_hdr_destroy(header);
    if (in) sam_close(in);
    if (out) sam_close(out);
}
Beispiel #2
0
static bool readgroupise(state_t* state)
{
    if (sam_hdr_write(state->output_file, state->output_header) != 0) {
        print_error_errno("addreplacerg", "[%s] Could not write header to output file", __func__);
        return false;
    }

    bam1_t* file_read = bam_init1();
    int ret;
    while ((ret = sam_read1(state->input_file, state->input_header, file_read)) >= 0) {
        state->mode_func(state, file_read);

        if (sam_write1(state->output_file, state->output_header, file_read) < 0) {
            print_error_errno("addreplacerg", "[%s] Could not write read to output file", __func__);
            bam_destroy1(file_read);
            return false;
        }
    }
    bam_destroy1(file_read);
    if (ret != -1) {
        print_error_errno("addreplacerg", "[%s] Error reading from input file", __func__);
        return false;
    } else {
        return true;
    }
}
Beispiel #3
0
int main(int argc, char **argv) {
    dlib::BamHandle in = dlib::BamHandle("bed_test.bam");
    dlib::ParsedBed bed = dlib::ParsedBed("bed_test.bed", in.header);
    bam1_t *b = bam_init1();
    size_t diffs = 0;
    void *lh3bed = bed_read("bed_test.bed");
    samFile *so = sam_open("disagreed.bam", "wb9");
    sam_hdr_write(so, in.header);
    size_t disagrees = 0, agrees = 0;
    int dbr = 0, lh3r = 0;
    while(in.read(b) != -1) {
        if(b->core.flag & (BAM_FUNMAP)) continue;
        if((dbr = bed.bam1_test(b)) != (lh3r = bed_overlap(lh3bed, in.header->target_name[b->core.tid], b->core.pos, bam_endpos(b)))) {
            LOG_EXIT("dbr: %i. lh3r: %i. Contig: %s. Position: %i. endpos; %i\n", dbr, lh3r, in.header->target_name[b->core.tid], b->core.pos, bam_endpos(b));
            if(++disagrees % 100 == 0) LOG_DEBUG("disagrees: %lu.\n", disagrees);
            sam_write1(so, in.header, b);
        } else {
            if(++agrees % 500000 == 0) LOG_DEBUG("agrees: %lu.\n", agrees);
        }
    }
    sam_close(so);
    bam_destroy1(b);
    bed_destroy(lh3bed);
    return EXIT_SUCCESS;
}
Beispiel #4
0
  void ingestHeader(std::shared_ptr<bam_hdr_t> &header) {
    auto version = bamql::version();
    std::stringstream name;
    name << "bamql-chain ";
    for (auto chains = known_chains.begin(); chains != known_chains.end();
         chains++) {
      if (chains->second == chain) {
        name << chains->first;
        break;
      }
    }

    uuid_t uuid;
    uuid_generate(uuid);
    char id_str[sizeof(uuid_t) * 2 + 1];
    uuid_unparse(uuid, id_str);

    auto copy = bamql::appendProgramToHeader(
        header.get(), name.str(), std::string(id_str), version, query);
    if (output_file) {
      sam_hdr_write(output_file.get(), copy.get());
    }
    if (next)
      next->ingestHeader(chain == 3 ? header : copy);
  }
Beispiel #5
0
/*
 * Reads a file and outputs a new CRAM file to stdout with 'h'
 * replaced as the header.  No checks are made to the validity.
 *
 * FIXME: error checking
 */
int cram_reheader(cram_fd *in, bam_hdr_t *h, const char *arg_list, int add_PG)
{
    htsFile *h_out = hts_open("-", "wc");
    cram_fd *out = h_out->fp.cram;
    cram_container *c = NULL;
    int ret = -1;

    // Attempt to fill out a cram->refs[] array from @SQ headers
    cram_fd_set_header(out, sam_hdr_parse_(h->text, h->l_text));
    if (add_PG) {
        if (sam_hdr_add_PG(cram_fd_get_header(out), "samtools",
                           "VN", samtools_version(),
                           arg_list ? "CL": NULL,
                           arg_list ? arg_list : NULL,
                           NULL) != 0)
            goto err;

        // Covert back to bam_hdr_t struct
        free(h->text);
        h->text = strdup(sam_hdr_str(cram_fd_get_header(out)));
        h->l_text = sam_hdr_length(cram_fd_get_header(out));
        if (!h->text)
            goto err;
    }

    if (sam_hdr_write(h_out, h) != 0)
        goto err;
    cram_set_option(out, CRAM_OPT_REFERENCE, NULL);

    while ((c = cram_read_container(in))) {
        int32_t i, num_blocks = cram_container_get_num_blocks(c);
        if (cram_write_container(out, c) != 0)
            goto err;

        for (i = 0; i < num_blocks; i++) {
            cram_block *blk = cram_read_block(in);
            if (!blk || cram_write_block(out, blk) != 0) {
                if (blk) cram_free_block(blk);
                goto err;
            }
            cram_free_block(blk);
        }
        cram_free_container(c);
    }

    ret = 0;

 err:
    if (hts_close(h_out) != 0)
        ret = -1;

    return ret;
}
static void write_buffer(const char *fn, const char *mode, size_t l, bam1_p *buf, const bam_hdr_t *h, int n_threads)
{
    size_t i;
    samFile* fp;
    fp = sam_open(fn, mode);
    if (fp == NULL) return;
    sam_hdr_write(fp, h);
    if (n_threads > 1) hts_set_threads(fp, n_threads);
    for (i = 0; i < l; ++i)
        sam_write1(fp, h, buf[i]);
    sam_close(fp);
}
Beispiel #7
0
samfile_t *samopen(const char *fn, const char *mode, const void *aux)
{
    // hts_open() is really sam_open(), except for #define games
    samFile *hts_fp = hts_open(fn, mode);
    if (hts_fp == NULL)  return NULL;

    samfile_t *fp = malloc(sizeof (samfile_t));
    if (!fp) {
        sam_close(hts_fp);
        return NULL;
    }
    fp->file = hts_fp;
    fp->x.bam = hts_fp->fp.bgzf;
    if (strchr(mode, 'r')) {
        if (aux) {
            if (hts_set_fai_filename(fp->file, aux) != 0) {
                sam_close(hts_fp);
                free(fp);
                return NULL;
            }
        }
        fp->header = sam_hdr_read(fp->file);  // samclose() will free this
        if (fp->header == NULL) {
            sam_close(hts_fp);
            free(fp);
            return NULL;
        }
        fp->is_write = 0;
        if (fp->header->n_targets == 0 && bam_verbose >= 1)
            fprintf(samtools_stderr, "[samopen] no @SQ lines in the header.\n");
    }
    else {
        enum htsExactFormat fmt = hts_get_format(fp->file)->format;
        fp->header = (bam_hdr_t *)aux;  // For writing, we won't free it
        fp->is_write = 1;
        if (!(fmt == text_format || fmt == sam) || strchr(mode, 'h')) {
            if (sam_hdr_write(fp->file, fp->header) < 0) {
                if (bam_verbose >= 1)
                    fprintf(samtools_stderr, "[samopen] Couldn't write header\n");
                sam_close(hts_fp);
                free(fp);
                return NULL;
            }
        }
    }

    return fp;
}
Beispiel #8
0
static int view_sam(hFILE *hfp, const char *filename)
{
    samFile *in = hts_hopen(hfp, filename, "r");
    if (in == NULL) return 0;
    samFile *out = dup_stdout("w");
    bam_hdr_t *hdr = sam_hdr_read(in);

    if (show_headers) sam_hdr_write(out, hdr);
    if (mode == view_all) {
        bam1_t *b = bam_init1();
        while (sam_read1(in, hdr, b) >= 0)
            sam_write1(out, hdr, b);
        bam_destroy1(b);
    }

    bam_hdr_destroy(hdr);
    hts_close(out);
    hts_close(in);
    return 1;
}
Beispiel #9
0
    void parse() {
        if (sam_hdr_write(out_file, replace_header) != 0) {
            throw new std::runtime_error("IEEEE!");
        }
        
        bam1_t* file_read = bam_init1();
        
        while (sam_read1(file_iter, file_header, file_read) >= 0) {
            if (file_read->core.tid != -1) {
                file_read->core.tid = trans[file_read->core.tid];
            }
            if (file_read->core.mtid != -1) {
                file_read->core.mtid = trans[file_read->core.mtid];
            }
            sam_write1(out_file, file_header, file_read);
        }

        // Clean up
        if (file_read) { bam_destroy1(file_read); }
    }
Beispiel #10
0
    SamWriterPrivate(const std::string& filename,
                      const PBBAM_SHARED_PTR<bam_hdr_t> rawHeader)
        : internal::FileProducer(filename)
        , file_(nullptr)
        , header_(rawHeader)
    {
        if (!header_)
            throw std::runtime_error("null header");

        // open file
        const string& usingFilename = TempFilename();
        const string& mode = string("w");
        file_.reset(sam_open(usingFilename.c_str(), mode.c_str()));
        if (!file_)
            throw std::runtime_error("could not open file for writing");

        // write header
        const int ret = sam_hdr_write(file_.get(), header_.get());
        if (ret != 0)
            throw std::runtime_error("could not write header");
    }
Beispiel #11
0
void SamWriter::write_header() const {
  sam_hdr_write(m_out_file, m_header.m_header.get());
}
Beispiel #12
0
int main_samview(int argc, char *argv[])
{
 int index;
    for(index = 0; index < argc; index++) {
        printf("The %d is %s\n",index,argv[index]);
    }
    getchar();return 0;
    int c, is_header = 0, is_header_only = 0, ret = 0, compress_level = -1, is_count = 0;
    int is_long_help = 0, n_threads = 0;
    int64_t count = 0;
    samFile *in = 0, *out = 0, *un_out=0;
    bam_hdr_t *header = NULL;
    char out_mode[5], out_un_mode[5], *out_format = "";
    char *fn_in = 0, *fn_out = 0, *fn_list = 0, *q, *fn_un_out = 0;
    sam_global_args ga = SAM_GLOBAL_ARGS_INIT;

    samview_settings_t settings = {
        .rghash = NULL,
        .min_mapQ = 0,
        .flag_on = 0,
        .flag_off = 0,
        .min_qlen = 0,
        .remove_B = 0,
        .subsam_seed = 0,
        .subsam_frac = -1.,
        .library = NULL,
        .bed = NULL,
    };

    static const struct option lopts[] = {
        SAM_OPT_GLOBAL_OPTIONS('-', 0, 'O', 0, 'T'),
        { "threads", required_argument, NULL, '@' },
        { NULL, 0, NULL, 0 }
    };

    /* parse command-line options */
    strcpy(out_mode, "w");
    strcpy(out_un_mode, "w");
    while ((c = getopt_long(argc, argv,
                            "SbBcCt:h1Ho:O:q:f:F:ul:r:?T:R:L:s:@:m:x:U:",
                            lopts, NULL)) >= 0) {
        switch (c) {
        case 's':
            if ((settings.subsam_seed = strtol(optarg, &q, 10)) != 0) {
                srand(settings.subsam_seed);
                settings.subsam_seed = rand();
            }
            settings.subsam_frac = strtod(q, &q);
            break;
        case 'm': settings.min_qlen = atoi(optarg); break;
        case 'c': is_count = 1; break;
        case 'S': break;
        case 'b': out_format = "b"; break;
        case 'C': out_format = "c"; break;
        case 't': fn_list = strdup(optarg); break;
        case 'h': is_header = 1; break;
        case 'H': is_header_only = 1; break;
        case 'o': fn_out = strdup(optarg); break;
        case 'U': fn_un_out = strdup(optarg); break;
        case 'f': settings.flag_on |= strtol(optarg, 0, 0); break;
        case 'F': settings.flag_off |= strtol(optarg, 0, 0); break;
        case 'q': settings.min_mapQ = atoi(optarg); break;
        case 'u': compress_level = 0; break;
        case '1': compress_level = 1; break;
        case 'l': settings.library = strdup(optarg); break;
        case 'L':
            if ((settings.bed = bed_read(optarg)) == NULL) {
                print_error_errno("view", "Could not read file \"%s\"", optarg);
                ret = 1;
                goto view_end;
            }
            break;
        case 'r':
            if (add_read_group_single("view", &settings, optarg) != 0) {
                ret = 1;
                goto view_end;
            }
            break;
        case 'R':
            if (add_read_groups_file("view", &settings, optarg) != 0) {
                ret = 1;
                goto view_end;
            }
            break;
                /* REMOVED as htslib doesn't support this
        //case 'x': out_format = "x"; break;
        //case 'X': out_format = "X"; break;
                 */
        case '?': is_long_help = 1; break;
        case 'B': settings.remove_B = 1; break;
        case '@': n_threads = strtol(optarg, 0, 0); break;
        case 'x':
            {
                if (strlen(optarg) != 2) {
                    fprintf(stderr, "main_samview: Error parsing -x auxiliary tags should be exactly two characters long.\n");
                    return usage(stderr, EXIT_FAILURE, is_long_help);
                }
                settings.remove_aux = (char**)realloc(settings.remove_aux, sizeof(char*) * (++settings.remove_aux_len));
                settings.remove_aux[settings.remove_aux_len-1] = optarg;
            }
            break;

        default:
            if (parse_sam_global_opt(c, optarg, lopts, &ga) != 0)
                return usage(stderr, EXIT_FAILURE, is_long_help);
            break;
        }
    }
    if (compress_level >= 0 && !*out_format) out_format = "b";
    if (is_header_only) is_header = 1;
    // File format auto-detection first
    if (fn_out)    sam_open_mode(out_mode+1,    fn_out,    NULL);
    if (fn_un_out) sam_open_mode(out_un_mode+1, fn_un_out, NULL);
    // Overridden by manual -b, -C
    if (*out_format)
        out_mode[1] = out_un_mode[1] = *out_format;
    out_mode[2] = out_un_mode[2] = '\0';
    // out_(un_)mode now 1 or 2 bytes long, followed by nul.
    if (compress_level >= 0) {
        char tmp[2];
        tmp[0] = compress_level + '0'; tmp[1] = '\0';
        strcat(out_mode, tmp);
        strcat(out_un_mode, tmp);
    }
    if (argc == optind && isatty(STDIN_FILENO)) return usage(stdout, EXIT_SUCCESS, is_long_help); // potential memory leak...

    fn_in = (optind < argc)? argv[optind] : "-";
    // generate the fn_list if necessary
    if (fn_list == 0 && ga.reference) fn_list = samfaipath(ga.reference);
    // open file handlers
    if ((in = sam_open_format(fn_in, "r", &ga.in)) == 0) {
        print_error_errno("view", "failed to open \"%s\" for reading", fn_in);
        ret = 1;
        goto view_end;
    }

    if (fn_list) {
        if (hts_set_fai_filename(in, fn_list) != 0) {
            fprintf(stderr, "[main_samview] failed to use reference \"%s\".\n", fn_list);
            ret = 1;
            goto view_end;
        }
    }
    if ((header = sam_hdr_read(in)) == 0) {
        fprintf(stderr, "[main_samview] fail to read the header from \"%s\".\n", fn_in);
        ret = 1;
        goto view_end;
    }
    if (settings.rghash) { // FIXME: I do not know what "bam_header_t::n_text" is for...
        char *tmp;
        int l;
        tmp = drop_rg(header->text, settings.rghash, &l);
        free(header->text);
        header->text = tmp;
        header->l_text = l;
    }
    if (!is_count) {
        if ((out = sam_open_format(fn_out? fn_out : "-", out_mode, &ga.out)) == 0) {
            print_error_errno("view", "failed to open \"%s\" for writing", fn_out? fn_out : "standard output");
            ret = 1;
            goto view_end;
        }
        if (fn_list) {
            if (hts_set_fai_filename(out, fn_list) != 0) {
                fprintf(stderr, "[main_samview] failed to use reference \"%s\".\n", fn_list);
                ret = 1;
                goto view_end;
            }
        }
        if (*out_format || is_header ||
            out_mode[1] == 'b' || out_mode[1] == 'c' ||
            (ga.out.format != sam && ga.out.format != unknown_format))  {
            if (sam_hdr_write(out, header) != 0) {
                fprintf(stderr, "[main_samview] failed to write the SAM header\n");
                ret = 1;
                goto view_end;
            }
        }
        if (fn_un_out) {
            if ((un_out = sam_open_format(fn_un_out, out_un_mode, &ga.out)) == 0) {
                print_error_errno("view", "failed to open \"%s\" for writing", fn_un_out);
                ret = 1;
                goto view_end;
            }
            if (fn_list) {
                if (hts_set_fai_filename(un_out, fn_list) != 0) {
                    fprintf(stderr, "[main_samview] failed to use reference \"%s\".\n", fn_list);
                    ret = 1;
                    goto view_end;
                }
            }
            if (*out_format || is_header ||
                out_un_mode[1] == 'b' || out_un_mode[1] == 'c' ||
                (ga.out.format != sam && ga.out.format != unknown_format))  {
                if (sam_hdr_write(un_out, header) != 0) {
                    fprintf(stderr, "[main_samview] failed to write the SAM header\n");
                    ret = 1;
                    goto view_end;
                }
            }
        }
    }

    if (n_threads > 1) { if (out) hts_set_threads(out, n_threads); }
    if (is_header_only) goto view_end; // no need to print alignments

    if (optind + 1 >= argc) { // convert/print the entire file
        bam1_t *b = bam_init1();
        int r;
        while ((r = sam_read1(in, header, b)) >= 0) { // read one alignment from `in'
            if (!process_aln(header, b, &settings)) {
                if (!is_count) { if (check_sam_write1(out, header, b, fn_out, &ret) < 0) break; }
                count++;
            } else {
                if (un_out) { if (check_sam_write1(un_out, header, b, fn_un_out, &ret) < 0) break; }
            }
        }
        if (r < -1) {
            fprintf(stderr, "[main_samview] truncated file.\n");
            ret = 1;
        }
        bam_destroy1(b);
    } else { // retrieve alignments in specified regions
        int i;
        bam1_t *b;
        hts_idx_t *idx = sam_index_load(in, fn_in); // load index
        if (idx == 0) { // index is unavailable
            fprintf(stderr, "[main_samview] random alignment retrieval only works for indexed BAM or CRAM files.\n");
            ret = 1;
            goto view_end;
        }
        b = bam_init1();
        for (i = optind + 1; i < argc; ++i) {
            int result;
            hts_itr_t *iter = sam_itr_querys(idx, header, argv[i]); // parse a region in the format like `chr2:100-200'
            if (iter == NULL) { // region invalid or reference name not found
                int beg, end;
                if (hts_parse_reg(argv[i], &beg, &end))
                    fprintf(stderr, "[main_samview] region \"%s\" specifies an unknown reference name. Continue anyway.\n", argv[i]);
                else
                    fprintf(stderr, "[main_samview] region \"%s\" could not be parsed. Continue anyway.\n", argv[i]);
                continue;
            }
            // fetch alignments
            while ((result = sam_itr_next(in, iter, b)) >= 0) {
                if (!process_aln(header, b, &settings)) {
                    if (!is_count) { if (check_sam_write1(out, header, b, fn_out, &ret) < 0) break; }
                    count++;
                } else {
                    if (un_out) { if (check_sam_write1(un_out, header, b, fn_un_out, &ret) < 0) break; }
                }
            }
            hts_itr_destroy(iter);
            if (result < -1) {
                fprintf(stderr, "[main_samview] retrieval of region \"%s\" failed due to truncated file or corrupt BAM index file\n", argv[i]);
                ret = 1;
                break;
            }
        }
        bam_destroy1(b);
        hts_idx_destroy(idx); // destroy the BAM index
    }

view_end:
    if (is_count && ret == 0)
        printf("%" PRId64 "\n", count);

    // close files, free and return
    if (in) check_sam_close("view", in, fn_in, "standard input", &ret);
    if (out) check_sam_close("view", out, fn_out, "standard output", &ret);
    if (un_out) check_sam_close("view", un_out, fn_un_out, "file", &ret);

    free(fn_list); free(fn_out); free(settings.library);  free(fn_un_out);
    sam_global_args_free(&ga);
    if ( header ) bam_hdr_destroy(header);
    if (settings.bed) bed_destroy(settings.bed);
    if (settings.rghash) {
        khint_t k;
        for (k = 0; k < kh_end(settings.rghash); ++k)
            if (kh_exist(settings.rghash, k)) free((char*)kh_key(settings.rghash, k));
        kh_destroy(rg, settings.rghash);
    }
    if (settings.remove_aux_len) {
        free(settings.remove_aux);
    }
    return ret;
}

static int usage(FILE *fp, int exit_status, int is_long_help)
{
    fprintf(fp,
"\n"
"Usage: samtools view [options] <in.bam>|<in.sam>|<in.cram> [region ...]\n"
"\n"
"Options:\n"
// output options
"  -b       output BAM\n"
"  -C       output CRAM (requires -T)\n"
"  -1       use fast BAM compression (implies -b)\n"
"  -u       uncompressed BAM output (implies -b)\n"
"  -h       include header in SAM output\n"
"  -H       print SAM header only (no alignments)\n"
"  -c       print only the count of matching records\n"
"  -o FILE  output file name [stdout]\n"
"  -U FILE  output reads not selected by filters to FILE [null]\n"
// extra input
"  -t FILE  FILE listing reference names and lengths (see long help) [null]\n"
// read filters
"  -L FILE  only include reads overlapping this BED FILE [null]\n"
"  -r STR   only include reads in read group STR [null]\n"
"  -R FILE  only include reads with read group listed in FILE [null]\n"
"  -q INT   only include reads with mapping quality >= INT [0]\n"
"  -l STR   only include reads in library STR [null]\n"
"  -m INT   only include reads with number of CIGAR operations consuming\n"
"           query sequence >= INT [0]\n"
"  -f INT   only include reads with all bits set in INT set in FLAG [0]\n"
"  -F INT   only include reads with none of the bits set in INT set in FLAG [0]\n"
// read processing
"  -x STR   read tag to strip (repeatable) [null]\n"
"  -B       collapse the backward CIGAR operation\n"
"  -s FLOAT integer part sets seed of random number generator [0];\n"
"           rest sets fraction of templates to subsample [no subsampling]\n"
// general options
"  -@, --threads INT\n"
"           number of BAM/CRAM compression threads [0]\n"
"  -?       print long help, including note about region specification\n"
"  -S       ignored (input format is auto-detected)\n");

    sam_global_opt_help(fp, "-.O.T");
    fprintf(fp, "\n");

    if (is_long_help)
        fprintf(fp,
"Notes:\n"
"\n"
"1. This command now auto-detects the input format (BAM/CRAM/SAM).\n"
"   Further control over the CRAM format can be specified by using the\n"
"   --output-fmt-option, e.g. to specify the number of sequences per slice\n"
"   and to use avoid reference based compression:\n"
"\n"
"\tsamtools view -C --output-fmt-option seqs_per_slice=5000 \\\n"
"\t   --output-fmt-option no_ref -o out.cram in.bam\n"
"\n"
"   Options can also be specified as a comma separated list within the\n"
"   --output-fmt value too.  For example this is equivalent to the above\n"
"\n"
"\tsamtools view --output-fmt cram,seqs_per_slice=5000,no_ref \\\n"
"\t   -o out.cram in.bam\n"
"\n"
"2. The file supplied with `-t' is SPACE/TAB delimited with the first\n"
"   two fields of each line consisting of the reference name and the\n"
"   corresponding sequence length. The `.fai' file generated by \n"
"   `samtools faidx' is suitable for use as this file. This may be an\n"
"   empty file if reads are unaligned.\n"
"\n"
"3. SAM->BAM conversion:  samtools view -bT ref.fa in.sam.gz\n"
"\n"
"4. BAM->SAM conversion:  samtools view -h in.bam\n"
"\n"
"5. A region should be presented in one of the following formats:\n"
"   `chr1', `chr2:1,000' and `chr3:1000-2,000'. When a region is\n"
"   specified, the input alignment file must be a sorted and indexed\n"
"   alignment (BAM/CRAM) file.\n"
"\n"
"6. Option `-u' is preferred over `-b' when the output is piped to\n"
"   another samtools command.\n"
"\n");

    return exit_status;
}
Beispiel #13
0
static bool split(state_t* state)
{
    if (state->unaccounted_file && sam_hdr_write(state->unaccounted_file, state->unaccounted_header) != 0) {
        fprintf(pysamerr, "Could not write output file header\n");
        return false;
    }
    size_t i;
    for (i = 0; i < state->output_count; i++) {
        if (sam_hdr_write(state->rg_output_file[i], state->rg_output_header[i]) != 0) {
            fprintf(pysamerr, "Could not write output file header\n");
            return false;
        }
    }

    bam1_t* file_read = bam_init1();
    // Read the first record
    if (sam_read1(state->merged_input_file, state->merged_input_header, file_read) < 0) {
        // Nothing more to read?  Ignore this file
        bam_destroy1(file_read);
        file_read = NULL;
    }

    while (file_read != NULL) {
        // Get RG tag from read and look it up in hash to find file to output it to
        uint8_t* tag = bam_aux_get(file_read, "RG");
        khiter_t iter;
        if ( tag != NULL ) {
            char* rg = bam_aux2Z(tag);
            iter = kh_get_c2i(state->rg_hash, rg);
        } else {
            iter = kh_end(state->rg_hash);
        }

        // Write the read out to correct file
        if (iter != kh_end(state->rg_hash)) {
            // if found write to the appropriate untangled bam
            int i = kh_val(state->rg_hash,iter);
            sam_write1(state->rg_output_file[i], state->rg_output_header[i], file_read);
        } else {
            // otherwise write to the unaccounted bam if there is one or fail
            if (state->unaccounted_file == NULL) {
                if (tag) {
                    fprintf(pysamerr, "Read \"%s\" with unaccounted for tag \"%s\".\n", bam_get_qname(file_read), bam_aux2Z(tag));
                } else {
                    fprintf(pysamerr, "Read \"%s\" has no RG tag.\n", bam_get_qname(file_read));
                }
                bam_destroy1(file_read);
                return false;
            } else {
                sam_write1(state->unaccounted_file, state->unaccounted_header, file_read);
            }
        }

        // Replace written read with the next one to process
        if (sam_read1(state->merged_input_file, state->merged_input_header, file_read) < 0) {
            // Nothing more to read?  Ignore this file in future
            bam_destroy1(file_read);
            file_read = NULL;
        }
    }

    return true;
}
Beispiel #14
0
int main(int argc, char *argv[])
{
	samFile *in;
	char *fn_ref = 0;
	int flag = 0, c, clevel = -1, ignore_sam_err = 0;
	char moder[8];
	bam_hdr_t *h;
	bam1_t *b;
	htsFile *out;
	char modew[8];
	int r = 0, exit_code = 0;

	while ((c = getopt(argc, argv, "IbDCSl:t:")) >= 0) {
		switch (c) {
		case 'S': flag |= 1; break;
		case 'b': flag |= 2; break;
		case 'D': flag |= 4; break;
		case 'C': flag |= 8; break;
		case 'l': clevel = atoi(optarg); flag |= 2; break;
		case 't': fn_ref = optarg; break;
		case 'I': ignore_sam_err = 1; break;
		}
	}
	if (argc == optind) {
		fprintf(stderr, "Usage: samview [-bSCSI] [-l level] <in.bam>|<in.sam>|<in.cram> [region]\n");
		return 1;
	}
	strcpy(moder, "r");
	if (flag&4) strcat(moder, "c");
	else if ((flag&1) == 0) strcat(moder, "b");

	in = sam_open(argv[optind], moder);
	h = sam_hdr_read(in);
	h->ignore_sam_err = ignore_sam_err;
	b = bam_init1();

	strcpy(modew, "w");
	if (clevel >= 0 && clevel <= 9) sprintf(modew + 1, "%d", clevel);
	if (flag&8) strcat(modew, "c");
	else if (flag&2) strcat(modew, "b");
	out = hts_open("-", modew);

	/* CRAM output */
	if (flag & 8) {
	    // Parse input header and use for CRAM output
	    out->fp.cram->header = sam_hdr_parse_(h->text, h->l_text);

	    // Create CRAM references arrays
	    if (fn_ref)
		cram_set_option(out->fp.cram, CRAM_OPT_REFERENCE, fn_ref);
	    else
		// Attempt to fill out a cram->refs[] array from @SQ headers
		cram_set_option(out->fp.cram, CRAM_OPT_REFERENCE, NULL);
	}

	sam_hdr_write(out, h);
	if (optind + 1 < argc && !(flag&1)) { // BAM input and has a region
	    int i;
	    hts_idx_t *idx;
	    if ((idx = bam_index_load(argv[optind])) == 0) {
		fprintf(stderr, "[E::%s] fail to load the BAM index\n", __func__);
		return 1;
	    }
	    for (i = optind + 1; i < argc; ++i) {
		hts_itr_t *iter;
		if ((iter = bam_itr_querys(idx, h, argv[i])) == 0) {
		    fprintf(stderr, "[E::%s] fail to parse region '%s'\n", __func__, argv[i]);
		    continue;
		}
		while ((r = bam_itr_next(in, iter, b)) >= 0) {
		    if (sam_write1(out, h, b) < 0) {
			fprintf(stderr, "Error writing output.\n");
			exit_code = 1;
			break;
		    }
		}
		hts_itr_destroy(iter);
	    }
	    hts_idx_destroy(idx);
	} else while ((r = sam_read1(in, h, b)) >= 0) {
		if (sam_write1(out, h, b) < 0) {
			fprintf(stderr, "Error writing output.\n");
			exit_code = 1;
			break;
		}
	}
	sam_close(out);

	if (r < -1) {
	    fprintf(stderr, "Error parsing input.\n");
	    exit_code = 1;
	}

	bam_destroy1(b);
	bam_hdr_destroy(h);
	sam_close(in);
	return exit_code;
}
Beispiel #15
0
int main(int argc, char *argv[])
{
    samFile *in;
    char *fn_ref = 0;
    int flag = 0, c, clevel = -1, ignore_sam_err = 0;
    char moder[8];
    bam_hdr_t *h;
    bam1_t *b;
    htsFile *out;
    char modew[8];
    int r = 0, exit_code = 0;
    hts_opt *in_opts = NULL, *out_opts = NULL, *last = NULL;
    int nreads = 0;
    int benchmark = 0;

    while ((c = getopt(argc, argv, "IbDCSl:t:i:o:N:B")) >= 0) {
        switch (c) {
        case 'S': flag |= 1; break;
        case 'b': flag |= 2; break;
        case 'D': flag |= 4; break;
        case 'C': flag |= 8; break;
        case 'B': benchmark = 1; break;
        case 'l': clevel = atoi(optarg); flag |= 2; break;
        case 't': fn_ref = optarg; break;
        case 'I': ignore_sam_err = 1; break;
        case 'i': if (add_option(&in_opts,  optarg)) return 1; break;
        case 'o': if (add_option(&out_opts, optarg)) return 1; break;
        case 'N': nreads = atoi(optarg);
        }
    }
    if (argc == optind) {
        fprintf(stderr, "Usage: samview [-bSCSIB] [-N num_reads] [-l level] [-o option=value] <in.bam>|<in.sam>|<in.cram> [region]\n");
        return 1;
    }
    strcpy(moder, "r");
    if (flag&4) strcat(moder, "c");
    else if ((flag&1) == 0) strcat(moder, "b");

    in = sam_open(argv[optind], moder);
    if (in == NULL) {
        fprintf(stderr, "Error opening \"%s\"\n", argv[optind]);
        return EXIT_FAILURE;
    }
    h = sam_hdr_read(in);
    h->ignore_sam_err = ignore_sam_err;
    b = bam_init1();

    strcpy(modew, "w");
    if (clevel >= 0 && clevel <= 9) sprintf(modew + 1, "%d", clevel);
    if (flag&8) strcat(modew, "c");
    else if (flag&2) strcat(modew, "b");
    out = hts_open("-", modew);
    if (out == NULL) {
        fprintf(stderr, "Error opening standard output\n");
        return EXIT_FAILURE;
    }

    /* CRAM output */
    if (flag & 8) {
        int ret;

        // Parse input header and use for CRAM output
        out->fp.cram->header = sam_hdr_parse_(h->text, h->l_text);

        // Create CRAM references arrays
        if (fn_ref)
            ret = cram_set_option(out->fp.cram, CRAM_OPT_REFERENCE, fn_ref);
        else
            // Attempt to fill out a cram->refs[] array from @SQ headers
            ret = cram_set_option(out->fp.cram, CRAM_OPT_REFERENCE, NULL);

        if (ret != 0)
            return EXIT_FAILURE;
    }

    // Process any options; currently cram only.
    for (; in_opts;  in_opts = (last=in_opts)->next, free(last)) {
        hts_set_opt(in,  in_opts->opt,  in_opts->val);
        if (in_opts->opt == CRAM_OPT_REFERENCE)
            if (hts_set_opt(out,  in_opts->opt,  in_opts->val) != 0)
                return EXIT_FAILURE;
    }
    for (; out_opts;  out_opts = (last=out_opts)->next, free(last))
        if (hts_set_opt(out, out_opts->opt,  out_opts->val) != 0)
            return EXIT_FAILURE;

    if (!benchmark)
        sam_hdr_write(out, h);
    if (optind + 1 < argc && !(flag&1)) { // BAM input and has a region
        int i;
        hts_idx_t *idx;
        if ((idx = sam_index_load(in, argv[optind])) == 0) {
            fprintf(stderr, "[E::%s] fail to load the BAM index\n", __func__);
            return 1;
        }
        for (i = optind + 1; i < argc; ++i) {
            hts_itr_t *iter;
            if ((iter = sam_itr_querys(idx, h, argv[i])) == 0) {
                fprintf(stderr, "[E::%s] fail to parse region '%s'\n", __func__, argv[i]);
                continue;
            }
            while ((r = sam_itr_next(in, iter, b)) >= 0) {
                if (!benchmark && sam_write1(out, h, b) < 0) {
                    fprintf(stderr, "Error writing output.\n");
                    exit_code = 1;
                    break;
                }
                if (nreads && --nreads == 0)
                    break;
            }
            hts_itr_destroy(iter);
        }
        hts_idx_destroy(idx);
    } else while ((r = sam_read1(in, h, b)) >= 0) {
        if (!benchmark && sam_write1(out, h, b) < 0) {
            fprintf(stderr, "Error writing output.\n");
            exit_code = 1;
            break;
        }
        if (nreads && --nreads == 0)
            break;
    }

    if (r < -1) {
        fprintf(stderr, "Error parsing input.\n");
        exit_code = 1;
    }

    r = sam_close(out);
    if (r < 0) {
        fprintf(stderr, "Error closing output.\n");
        exit_code = 1;
    }

    bam_destroy1(b);
    bam_hdr_destroy(h);

    r = sam_close(in);
    if (r < 0) {
        fprintf(stderr, "Error closing input.\n");
        exit_code = 1;
    }

    return exit_code;
}
Beispiel #16
0
int bam_fillmd(int argc, char *argv[])
{
    int c, flt_flag, tid = -2, ret, len, is_bam_out, is_uncompressed, max_nm, is_realn, capQ, baq_flag;
    samFile *fp = NULL, *fpout = NULL;
    bam_hdr_t *header = NULL;
    faidx_t *fai = NULL;
    char *ref = NULL, mode_w[8], *ref_file;
    bam1_t *b = NULL;
    sam_global_args ga = SAM_GLOBAL_ARGS_INIT;

    static const struct option lopts[] = {
        SAM_OPT_GLOBAL_OPTIONS('-', 0, 0, 0, 0),
        { NULL, 0, NULL, 0 }
    };

    flt_flag = UPDATE_NM | UPDATE_MD;
    is_bam_out = is_uncompressed = is_realn = max_nm = capQ = baq_flag = 0;
    strcpy(mode_w, "w");
    while ((c = getopt_long(argc, argv, "EqreuNhbSC:n:Ad", lopts, NULL)) >= 0) {
        switch (c) {
        case 'r':
            is_realn = 1;
            break;
        case 'e':
            flt_flag |= USE_EQUAL;
            break;
        case 'd':
            flt_flag |= DROP_TAG;
            break;
        case 'q':
            flt_flag |= BIN_QUAL;
            break;
        case 'h':
            flt_flag |= HASH_QNM;
            break;
        case 'N':
            flt_flag &= ~(UPDATE_MD|UPDATE_NM);
            break;
        case 'b':
            is_bam_out = 1;
            break;
        case 'u':
            is_uncompressed = is_bam_out = 1;
            break;
        case 'S':
            break;
        case 'n':
            max_nm = atoi(optarg);
            break;
        case 'C':
            capQ = atoi(optarg);
            break;
        case 'A':
            baq_flag |= 1;
            break;
        case 'E':
            baq_flag |= 2;
            break;
        default:
            if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
            fprintf(stderr, "[bam_fillmd] unrecognized option '-%c'\n\n", c);
        /* else fall-through */
        case '?':
            return calmd_usage();
        }
    }
    if (is_bam_out) strcat(mode_w, "b");
    else strcat(mode_w, "h");
    if (is_uncompressed) strcat(mode_w, "0");
    if (optind + (ga.reference == NULL) >= argc)
        return calmd_usage();
    fp = sam_open_format(argv[optind], "r", &ga.in);
    if (fp == NULL) {
        print_error_errno("calmd", "Failed to open input file '%s'", argv[optind]);
        return 1;
    }

    header = sam_hdr_read(fp);
    if (header == NULL || header->n_targets == 0) {
        fprintf(stderr, "[bam_fillmd] input SAM does not have header. Abort!\n");
        goto fail;
    }

    fpout = sam_open_format("-", mode_w, &ga.out);
    if (fpout == NULL) {
        print_error_errno("calmd", "Failed to open output");
        goto fail;
    }
    if (sam_hdr_write(fpout, header) < 0) {
        print_error_errno("calmd", "Failed to write sam header");
        goto fail;
    }

    ref_file = argc > optind + 1 ? argv[optind+1] : ga.reference;
    fai = fai_load(ref_file);

    if (!fai) {
        print_error_errno("calmd", "Failed to open reference file '%s'", ref_file);
        goto fail;
    }

    b = bam_init1();
    if (!b) {
        fprintf(stderr, "[bam_fillmd] Failed to allocate bam struct\n");
        goto fail;
    }
    while ((ret = sam_read1(fp, header, b)) >= 0) {
        if (b->core.tid >= 0) {
            if (tid != b->core.tid) {
                free(ref);
                ref = fai_fetch(fai, header->target_name[b->core.tid], &len);
                tid = b->core.tid;
                if (ref == 0) { // FIXME: Should this always be fatal?
                    fprintf(stderr, "[bam_fillmd] fail to find sequence '%s' in the reference.\n",
                            header->target_name[tid]);
                    if (is_realn || capQ > 10) goto fail; // Would otherwise crash
                }
            }
            if (is_realn) sam_prob_realn(b, ref, len, baq_flag);
            if (capQ > 10) {
                int q = sam_cap_mapq(b, ref, len, capQ);
                if (b->core.qual > q) b->core.qual = q;
            }
            if (ref) bam_fillmd1_core(b, ref, len, flt_flag, max_nm);
        }
        if (sam_write1(fpout, header, b) < 0) {
            print_error_errno("calmd", "failed to write to output file");
            goto fail;
        }
    }
    if (ret < -1) {
        fprintf(stderr, "[bam_fillmd] Error reading input.\n");
        goto fail;
    }
    bam_destroy1(b);
    bam_hdr_destroy(header);

    free(ref);
    fai_destroy(fai);
    sam_close(fp);
    if (sam_close(fpout) < 0) {
        fprintf(stderr, "[bam_fillmd] error when closing output file\n");
        return 1;
    }
    return 0;

fail:
    free(ref);
    if (b) bam_destroy1(b);
    if (header) bam_hdr_destroy(header);
    if (fai) fai_destroy(fai);
    if (fp) sam_close(fp);
    if (fpout) sam_close(fpout);
    return 1;
}
Beispiel #17
0
int main_pad2unpad(int argc, char *argv[])
{
    samFile *in = 0, *out = 0;
    bam_hdr_t *h = 0, *h_fix = 0;
    faidx_t *fai = 0;
    int c, compress_level = -1, is_long_help = 0;
    char in_mode[5], out_mode[6], *fn_out = 0, *fn_list = 0;
    int ret=0;
    sam_global_args ga = SAM_GLOBAL_ARGS_INIT;

    static const struct option lopts[] = {
        SAM_OPT_GLOBAL_OPTIONS('-', 0, 0, 0, 'T'),
        { NULL, 0, NULL, 0 }
    };

    /* parse command-line options */
    strcpy(in_mode, "r"); strcpy(out_mode, "w");
    while ((c = getopt_long(argc, argv, "SCso:u1T:?", lopts, NULL)) >= 0) {
        switch (c) {
        case 'S': break;
        case 'C': hts_parse_format(&ga.out, "cram"); break;
        case 's': assert(compress_level == -1); hts_parse_format(&ga.out, "sam"); break;
        case 'o': fn_out = strdup(optarg); break;
        case 'u':
            compress_level = 0;
            if (ga.out.format == unknown_format)
                hts_parse_format(&ga.out, "bam");
            break;
        case '1':
            compress_level = 1;
            if (ga.out.format == unknown_format)
                hts_parse_format(&ga.out, "bam");
            break;
        case '?': is_long_help = 1; break;
        default:  if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
            fprintf(stderr, "[bam_fillmd] unrecognized option '-%c'\n\n", c);
            return usage(is_long_help);
        }
    }
    if (argc == optind) return usage(is_long_help);

    strcat(out_mode, "h");
    if (compress_level >= 0) {
        char tmp[2];
        tmp[0] = compress_level + '0'; tmp[1] = '\0';
        strcat(out_mode, tmp);
    }

    // Load FASTA reference (also needed for SAM -> BAM if missing header)
    if (ga.reference) {
        fn_list = samfaipath(ga.reference);
        fai = fai_load(ga.reference);
    }
    // open file handlers
    if ((in = sam_open_format(argv[optind], in_mode, &ga.in)) == 0) {
        fprintf(stderr, "[depad] failed to open \"%s\" for reading.\n", argv[optind]);
        ret = 1;
        goto depad_end;
    }
    if (fn_list && hts_set_fai_filename(in, fn_list) != 0) {
        fprintf(stderr, "[depad] failed to load reference file \"%s\".\n", fn_list);
        ret = 1;
        goto depad_end;
    }
    if ((h = sam_hdr_read(in)) == 0) {
        fprintf(stderr, "[depad] failed to read the header from \"%s\".\n", argv[optind]);
        ret = 1;
        goto depad_end;
    }
    if (fai) {
        h_fix = fix_header(h, fai);
    } else {
        fprintf(stderr, "[depad] Warning - reference lengths will not be corrected without FASTA reference\n");
        h_fix = h;
    }
    char wmode[2];
    strcat(out_mode, sam_open_mode(wmode, fn_out, NULL)==0 ? wmode : "b");
    if ((out = sam_open_format(fn_out? fn_out : "-", out_mode, &ga.out)) == 0) {
        fprintf(stderr, "[depad] failed to open \"%s\" for writing.\n", fn_out? fn_out : "standard output");
        ret = 1;
        goto depad_end;
    }

    // Reference-based CRAM won't work unless we also create a new reference.
    // We could embed this, but for now we take the easy option.
    if (ga.out.format == cram)
        hts_set_opt(out, CRAM_OPT_NO_REF, 1);

    if (sam_hdr_write(out, h_fix) != 0) {
        fprintf(stderr, "[depad] failed to write header.\n");
        ret = 1;
        goto depad_end;
    }

    // Do the depad
    ret = bam_pad2unpad(in, out, h, fai);

depad_end:
    // close files, free and return
    if (fai) fai_destroy(fai);
    if (h) bam_hdr_destroy(h);
    sam_close(in);
    sam_close(out);
    free(fn_list); free(fn_out);
    return ret;
}
Beispiel #18
0
/*
 * CRAM files don't store the RG:Z:ID per read in the aux field.
 * Instead they have a numerical data series (RG) to point each read
 * back to the Nth @RG line in the file.  This means that we may need
 * to edit the RG data series (if the files were produced from
 * "samtools split" for example).
 *
 * The encoding method is stored in the compression header. Typical
 * examples:
 *
 * RG => EXTERNAL {18}           # Block content-id 18 holds RG values
 *                               # as a series of ITF8 encoded values
 *
 * RG => HUFFMAN {1, 255, 255, 255, 255, 255, 1, 0}
 *                               # One RG value #-1.  (No RG)
 *
 * RG => HUFFMAN {1, 0, 1, 0}    # One RG value #0 (always first RG)
 *
 * RG => HUFFMAN {2, 0, 1, 2, 1, 1}
 *                               # Two RG values, #0 and #1, written
 *                               # to the CORE block and possibly
 *                               # mixed with other data series.
 *
 * A single value can (but may not be) implemented as a zero bit
 * huffman code.  In this situation we can change the meta-data in the
 * compression header to renumber an RG value..
 */
int cram_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outcram)
{
    samFile *out;
    cram_fd *out_c;
    int i, vers_maj, vers_min;
    khash_s2i *rg2id = NULL;
    bam_hdr_t *new_h = NULL;

    /* Check consistent versioning and compatible headers */
    if (!(new_h = cram_cat_check_hdr(nfn, fn, h, &rg2id, &vers_maj, &vers_min)))
        return -1;

    /* Open the file with cram_vers */
    char vers[100];
    sprintf(vers, "%d.%d", vers_maj, vers_min);
    out = sam_open(outcram, "wc");
    if (out == 0) {
        fprintf(stderr, "[%s] ERROR: fail to open output file '%s'.\n", __func__, outcram);
        return 1;
    }
    out_c = out->fp.cram;
    cram_set_option(out_c, CRAM_OPT_VERSION, vers);
    //fprintf(stderr, "Creating cram vers %s\n", vers);

    cram_fd_set_header(out_c, sam_hdr_parse_(new_h->text,  new_h->l_text)); // needed?
    sam_hdr_write(out, new_h);

    for (i = 0; i < nfn; ++i) {
        samFile *in;
        cram_fd *in_c;
        cram_container *c;
        bam_hdr_t *old;
        int new_rg = -1;

        in = sam_open(fn[i], "rc");
        if (in == 0) {
            fprintf(stderr, "[%s] ERROR: fail to open file '%s'.\n", __func__, fn[i]);
            return -1;
        }
        in_c = in->fp.cram;

        old = sam_hdr_read(in);
        khash_s2i *rg2id_in = hash_rg(old);

        // Compute RG mapping if suitable for changing.
        if (rg2id_in->n_id == 1) {
            int _;
            new_rg = hash_s2i_inc(rg2id, rg2id_in->id[0], NULL, &_);
        } else {
            new_rg = 0;
        }

        hash_s2i_free(rg2id_in);


        // Copy contains and blocks within them
        while ((c = cram_read_container(in_c))) {
            cram_block *blk;

           if (cram_container_is_empty(in_c)) {
                if (cram_write_container(out_c, c) != 0)
                    return -1;

                // Container compression header
                if (!(blk = cram_read_block(in_c)))
                    return -1;
                if (cram_write_block(out_c, blk) != 0) {
                    cram_free_block(blk);
                    return -1;
                }
                cram_free_block(blk);
                cram_free_container(c);

                continue;
            }

            // If we have just one RG key and new_rg != 0 then
            // we need to edit the compression header. IF WE CAN.
            if (new_rg) {
                int zero = 0;
                //fprintf(stderr, "Transcode RG %d to %d\n", 0, new_rg);
                cram_transcode_rg(in_c, out_c, c, 1, &zero, &new_rg);
            } else {
                int32_t num_slices;

                // Not switching rg so do the usual read/write loop
                if (cram_write_container(out_c, c) != 0)
                    return -1;

                // Container compression header
                if (!(blk = cram_read_block(in_c)))
                    return -1;
                if (cram_write_block(out_c, blk) != 0) {
                    cram_free_block(blk);
                    return -1;
                }
                cram_free_block(blk);


                // Container num_blocks can be invalid, due to a bug.
                // Instead we iterate in slice context instead.
                (void)cram_container_get_landmarks(c, &num_slices);
                cram_copy_slice(in_c, out_c, num_slices);
            }

            cram_free_container(c);
        }

        bam_hdr_destroy(old);
        sam_close(in);
    }
    sam_close(out);

    hash_s2i_free(rg2id);
    bam_hdr_destroy(new_h);

    return 0;
}
Beispiel #19
0
int main(int argc, char **argv)
{
    if (argc < 4)
        errx(1,
             "usage\t:%s <bam> <split out> <discord out> (optional #threads)",
             argv[0]);

    char *bam_file_name = argv[1];
    char *split_file_name = argv[2];
    char *disc_file_name = argv[3];
    int threads = 2;
    if (argc == 5) {
        threads = atoi(argv[4]);
    }

    samFile *disc = sam_open(disc_file_name, "wb");

    samFile *split = sam_open(split_file_name, "wb");

    samFile *in = sam_open(bam_file_name, "rb");
    if(in == NULL)
        errx(1, "Unable to open BAM/SAM file.");

    // TODO: handle cram.
    if (threads > 1) {
        bgzf_mt(in->fp.bgzf, threads, 256);
    }

    hts_idx_t *idx = sam_index_load(in, bam_file_name);
    if(idx == NULL)
        errx(1,"Unable to open BAM/SAM index.");

    bam_hdr_t *hdr = sam_hdr_read(in);

    int r = sam_hdr_write(disc, hdr);
    r = sam_hdr_write(split, hdr);

    bam1_t *aln = bam_init1();
    int ret;

    while(ret = sam_read1(in, hdr, aln) >= 0) {
        if (((aln->core.flag) & 1294) == 0)
            r = sam_write1(disc, hdr, aln);

        uint8_t *sa = bam_aux_get(aln, "SA");

        if (sa != 0) {
            char *sa_tag = strdup(bam_aux2Z(sa));
            if ( count_tags(sa_tag) == 1) {
                char *chrm, strand, *cigar;
                uint32_t pos;
                split_sa_tag(sa_tag,
                             &chrm,
                             &pos,
                             &strand,
                             &cigar);

                struct line sa, al;

                calcOffsets(cigar,
                            pos,
                            strand,
                            &sa);
                sa.chrm = chrm;
                sa.strand = strand;


                calcAlnOffsets(bam_get_cigar(aln),
                               aln->core.n_cigar,
                               aln->core.pos,
                               bam_is_rev(aln) ? '-' : '+',
                               &al);
                al.chrm = hdr->target_name[aln->core.tid];
                al.strand = bam_is_rev(aln) ? '-' : '+';

                struct line *left = &al, *right = &sa;

                if (left->SQO > right->SQO) {
                    left = &sa;
                    right = &al;
                }

                int overlap = MAX(1 + MIN(left->EQO, right->EQO) - 
                        MAX(left->SQO, right->SQO), 0);
                int alen1 = 1 + left->EQO - left->SQO;
                int alen2 = 1 + right->EQO - right->SQO;
                int mno = MIN(alen1-overlap, alen2-overlap);
                if (mno < MIN_NON_OVERLAP) 
                    continue;

                if ( (strcmp(left->chrm, right->chrm) == 0) &&
                     (left->strand == right->strand) ) {

                    int leftDiag, rightDiag, insSize;
                    if (left->strand == '-') {
                        leftDiag = left->rapos - left->sclip;
                        rightDiag = (right->rapos + right->raLen) - 
                                (right->sclip + right->qaLen);
                        insSize = rightDiag - leftDiag;
                    } else {
                        leftDiag = (left->rapos + left->raLen) - 
                                (left->sclip + left->qaLen);
                        rightDiag = right->rapos - right->sclip;
                        insSize = leftDiag - rightDiag;
                    }
                    int desert = right->SQO - left->EQO - 1;
                    if ((abs(insSize) < MIN_INDEL_SIZE) || 
                        ((desert > 0) && (
                            (desert - (int)MAX(0, insSize)) >
                            MAX_UNMAPPED_BASES)))
                        continue;
                }

                char *qname =  bam_get_qname(aln);
                if ((aln->core.flag & 64) == 64)
                    qname[0] = 'A'; 
                else
                    qname[0] = 'B'; 

                r = sam_write1(split, hdr, aln);
            }
            free(sa_tag);
        }
    }

    bam_destroy1(aln);
    hts_idx_destroy(idx);
    bam_hdr_destroy(hdr);
    sam_close(in);
    sam_close(disc);
    sam_close(split);
    if(ret < -1) {
        errx(1, "lumpy_filter: error reading bam: %s\n", bam_file_name);
    }
}
Beispiel #20
0
// currently, this function ONLY works if each read has one hit
static void bam_mating_core(samFile* in, samFile* out, int remove_reads, int proper_pair_check, int add_ct)
{
    bam_hdr_t *header;
    bam1_t *b[2];
    int curr, has_prev, pre_end = 0, cur_end = 0;
    kstring_t str;

    str.l = str.m = 0; str.s = 0;
    header = sam_hdr_read(in);
    if (header == NULL) {
        fprintf(stderr, "[bam_mating_core] ERROR: Couldn't read header\n");
        exit(1);
    }
    // Accept unknown, unsorted, or queryname sort order, but error on coordinate sorted.
    if ((header->l_text > 3) && (strncmp(header->text, "@HD", 3) == 0)) {
        char *p, *q;
        p = strstr(header->text, "\tSO:coordinate");
        q = strchr(header->text, '\n');
        // Looking for SO:coordinate within the @HD line only
        // (e.g. must ignore in a @CO comment line later in header)
        if ((p != 0) && (p < q)) {
            fprintf(stderr, "[bam_mating_core] ERROR: Coordinate sorted, require grouped/sorted by queryname.\n");
            exit(1);
        }
    }
    sam_hdr_write(out, header);

    b[0] = bam_init1();
    b[1] = bam_init1();
    curr = 0; has_prev = 0;
    while (sam_read1(in, header, b[curr]) >= 0) {
        bam1_t *cur = b[curr], *pre = b[1-curr];
        if (cur->core.flag & BAM_FSECONDARY)
        {
            if ( !remove_reads ) sam_write1(out, header, cur);
            continue; // skip secondary alignments
        }
        if (cur->core.flag & BAM_FSUPPLEMENTARY)
        {
            sam_write1(out, header, cur);
            continue; // pass supplementary alignments through unchanged (TODO:make them match read they came from)
        }
        if (cur->core.tid < 0 || cur->core.pos < 0) // If unmapped set the flag
        {
            cur->core.flag |= BAM_FUNMAP;
        }
        if ((cur->core.flag&BAM_FUNMAP) == 0) // If mapped calculate end
        {
            cur_end = bam_endpos(cur);

            // Check cur_end isn't past the end of the contig we're on, if it is set the UNMAP'd flag
            if (cur_end > (int)header->target_len[cur->core.tid]) cur->core.flag |= BAM_FUNMAP;
        }
        if (has_prev) { // do we have a pair of reads to examine?
            if (strcmp(bam_get_qname(cur), bam_get_qname(pre)) == 0) { // identical pair name
                pre->core.flag |= BAM_FPAIRED;
                cur->core.flag |= BAM_FPAIRED;
                sync_mate(pre, cur);

                if (pre->core.tid == cur->core.tid && !(cur->core.flag&(BAM_FUNMAP|BAM_FMUNMAP))
                    && !(pre->core.flag&(BAM_FUNMAP|BAM_FMUNMAP))) // if safe set TLEN/ISIZE
                {
                    uint32_t cur5, pre5;
                    cur5 = (cur->core.flag&BAM_FREVERSE)? cur_end : cur->core.pos;
                    pre5 = (pre->core.flag&BAM_FREVERSE)? pre_end : pre->core.pos;
                    cur->core.isize = pre5 - cur5; pre->core.isize = cur5 - pre5;
                } else cur->core.isize = pre->core.isize = 0;
                if (add_ct) bam_template_cigar(pre, cur, &str);
                // TODO: Add code to properly check if read is in a proper pair based on ISIZE distribution
                if (proper_pair_check && !plausibly_properly_paired(pre,cur)) {
                    pre->core.flag &= ~BAM_FPROPER_PAIR;
                    cur->core.flag &= ~BAM_FPROPER_PAIR;
                }

                // Write out result
                if ( !remove_reads ) {
                    sam_write1(out, header, pre);
                    sam_write1(out, header, cur);
                } else {
                    // If we have to remove reads make sure we do it in a way that doesn't create orphans with bad flags
                    if(pre->core.flag&BAM_FUNMAP) cur->core.flag &= ~(BAM_FPAIRED|BAM_FMREVERSE|BAM_FPROPER_PAIR);
                    if(cur->core.flag&BAM_FUNMAP) pre->core.flag &= ~(BAM_FPAIRED|BAM_FMREVERSE|BAM_FPROPER_PAIR);
                    if(!(pre->core.flag&BAM_FUNMAP)) sam_write1(out, header, pre);
                    if(!(cur->core.flag&BAM_FUNMAP)) sam_write1(out, header, cur);
                }
                has_prev = 0;
            } else { // unpaired?  clear bad info and write it out
                if (pre->core.tid < 0 || pre->core.pos < 0 || pre->core.flag&BAM_FUNMAP) { // If unmapped
                    pre->core.flag |= BAM_FUNMAP;
                    pre->core.tid = -1;
                    pre->core.pos = -1;
                }
                pre->core.mtid = -1; pre->core.mpos = -1; pre->core.isize = 0;
                pre->core.flag &= ~(BAM_FPAIRED|BAM_FMREVERSE|BAM_FPROPER_PAIR);
                if ( !remove_reads || !(pre->core.flag&BAM_FUNMAP) ) sam_write1(out, header, pre);
            }
        } else has_prev = 1;
        curr = 1 - curr;
        pre_end = cur_end;
    }
    if (has_prev && !remove_reads) { // If we still have a BAM in the buffer it must be unpaired
        bam1_t *pre = b[1-curr];
        if (pre->core.tid < 0 || pre->core.pos < 0 || pre->core.flag&BAM_FUNMAP) { // If unmapped
            pre->core.flag |= BAM_FUNMAP;
            pre->core.tid = -1;
            pre->core.pos = -1;
        }
        pre->core.mtid = -1; pre->core.mpos = -1; pre->core.isize = 0;
        pre->core.flag &= ~(BAM_FPAIRED|BAM_FMREVERSE|BAM_FPROPER_PAIR);

        sam_write1(out, header, pre);
    }
    bam_hdr_destroy(header);
    bam_destroy1(b[0]);
    bam_destroy1(b[1]);
    free(str.s);
}
Beispiel #21
0
int do_grep() {
#ifdef DEBUGa
	printf("[!]do_grep\n");
#endif
	BamInfo_t *pbam;
	kh_cstr_t BamID;
	khiter_t ki, bami;
	kstring_t ks1 = { 0, 0, NULL };
	kstring_t ks2 = { 0, 0, NULL };
	kstring_t ks3 = { 0, 0, NULL };
	kstring_t kstr = { 0, 0, NULL };
	//ksprintf(kstr, "%s/%s_grep/", myConfig.WorkDir, myConfig.ProjectID);
	//const char *filePrefix = strdup(ks_str(kstr));
	//kputs(myConfig.WorkDir,kstr);

	samFile *in;
	bam_hdr_t *h;
	hts_idx_t *idx;
	bam1_t *b, *d, *d2, *bR1, *bR2, *bR3;
	bR1 = bam_init1(); bR2 = bam_init1(); bR3 = bam_init1();
	htsFile *out;
	//hts_opt *in_opts = NULL, *out_opts = NULL;
	int r = 0, exit_code = 0;

	kvec_t(bam1_t) R1, R2, RV;
	pierCluster_t *pierCluster;
	//samdat_t tmp_samdat;
#ifdef DEBUGa
	kstr.l = 0;
	ksprintf(&kstr, "%s/%s_grep/Greped.dump", myConfig.WorkDir, myConfig.ProjectID);
	FILE *fsdump = fopen(ks_str(&kstr),"w");
#endif
	kstr.l = 0;
	ksprintf(&kstr, "%s/%s_grep/Greped.ini", myConfig.WorkDir, myConfig.ProjectID);
	FILE *fs = fopen(ks_str(&kstr),"w");
	uint32_t blockid = 0;

	for (bami = kh_begin(bamNFOp); bami != kh_end(bamNFOp); ++bami) {
		//printf(">[%d]:\n",bami);
		if (kh_exist(bamNFOp, bami)) {
			kv_init(R1); kv_init(R2); kv_init(RV);
			//tmp_samdat = (const samdat_t){ 0 };
			//memset(&tmp_samdat,0,sizeof(samdat_t));
			//printf("-[%d]:\n",bami);
			BamID = kh_key(bamNFOp, bami);
			pbam = &kh_value(bamNFOp, bami);
			fprintf(stderr, "%u [%s]=%s\t%u %u\n",bami,BamID,pbam->fileName,pbam->insertSize,pbam->SD);

			in = sam_open(pbam->fileName, "r");
			if (in == NULL) {
				fprintf(stderr, "[x]Error opening \"%s\"\n", pbam->fileName);
				return EXIT_FAILURE;
			}
			h = sam_hdr_read(in);
			kstr.l = 0;
			ksprintf(&kstr, "%s/%s_grep/%s.bam", myConfig.WorkDir, myConfig.ProjectID, BamID);
			out = hts_open(ks_str(&kstr), "wb");
			if (out == NULL) {
				fprintf(stderr, "[x]Error opening [%s]\n",ks_str(&kstr));
				return EXIT_FAILURE;
			}
			if (sam_hdr_write(out, h) < 0) {
				fprintf(stderr, "[!]Error writing output header.\n");
				exit_code = 1;
			}

			int8_t *ChrIsHum;
			if (h == NULL) {
				fprintf(stderr, "[x]Couldn't read header for \"%s\"\n", pbam->fileName);
				return EXIT_FAILURE;
			} else {
				ChrIsHum = (int8_t *) malloc(h->n_targets * sizeof(int8_t));
				for (int32_t i=0; i < h->n_targets; ++i) {
					//ChrIsHum[i] = -1;
					ki = kh_get(chrNFO, chrNFOp, h->target_name[i]);
					if (ki == kh_end(chrNFOp)) {
						errx(4,"[x]Cannot find ChrID for [%s] !",h->target_name[i]);
					} else {
						ChrInfo_t * tmp = &kh_value(chrNFOp, ki);
						ChrIsHum[i] = tmp->isHum;
						//printf(">>> %d Chr:%s %d\n",i,h->target_name[i],ChrIsHum[i]);
					}
				}
			}
			h->ignore_sam_err = 0;
			b = bam_init1();
			d = bam_init1();
			d2 = bam_init1();
			if ((idx = sam_index_load(in, pbam->fileName)) == 0) {
				fprintf(stderr, "[E::%s] fail to load the BAM index\n", __func__);
				return 1;
			}
			pierCluster = sam_plp_init();
			while ((r = sam_read1(in, h, b)) >= 0) {
				int8_t flag = false;
				const bam1_core_t *c = &b->core;
				if (c->qual < myConfig.minBamQual) {
					continue;
				}
				if (c->n_cigar) {
					uint32_t *cigar = bam_get_cigar(b);
					int i = c->n_cigar; --i;
					if ( (bam_cigar_opchr(cigar[0])=='S' && bam_cigar_oplen(cigar[0]) >= myConfig.minGrepSlen) || 
						 (bam_cigar_opchr(cigar[i])=='S' && bam_cigar_oplen(cigar[i]) >= myConfig.minGrepSlen)    ) {
						flag = true;
					}
/* We only need /\d+S/ on both terminal, NOT inside.
					for (int i = 0; i < c->n_cigar; ++i) {
						if (bam_cigar_opchr(cigar[i])=='S') {	// soft clipping
							if ( bam_cigar_oplen(cigar[i]) >= myConfig.minGrepSlen ) {
								flag = true;
							}
						}
					}
*/
				}
				if (flag && ChrIsHum[c->tid]) {	// Now, skip Virus items.
					//bam_copy1(bR1, b);
					flag = 0;	// recycle
					//int enoughMapQ = 0;
					//kstring_t ks = { 0, 0, NULL };
					/*if (sam_format1(h, b, &ks1) < 0) {
						fprintf(stderr, "Error writing output.\n");
						exit_code = 1;
						break;
					} else */if ((c->mtid == c->tid && ChrIsHum[c->tid]) || (ChrIsHum[c->tid] ^ ChrIsHum[c->mtid])) {	// Only grep those mapped on same Human ChrID, or diff species/一方在病毒的情况.
						//printf(">[%s]\n",ks_str(&ks1));
						flag |= 1;
						//tmp_samdat.b = bam_dup1(b);
						//kv_push(samdat_t,R1,tmp_samdat);
						/*if (checkMapQ(ChrIsHum, b, true)) {
							++enoughMapQ;
						}*/
					}
					if (getPairedSam(in, idx, b, d) != 0) {
						flag &= ~1;
						continue;
					} else {
						flag |= 2;
						/*if (checkMapQ(ChrIsHum, d, false)) {
							++enoughMapQ;
						}*/
						if (c->flag & BAM_FSECONDARY) {
							if (getPairedSam(in, idx, d, d2) == 0) {
								//sam_format1(h, d2, &ks3);
								flag |= 4;
								/*if (checkMapQ(ChrIsHum, d2, false)) {
									++enoughMapQ;
								}*/
							}
						}
					}
/*
对于 BAM_FSECONDARY(256) 的 Read,跳两次 与 读 SA 项,效果一样。
>[sf95_Ref_48245009_48245108_48245208_Vir_-_2000_2044_R_100_90	353	chr2	13996555	0	50S40M	chr18	48245109	0ACACAACAATGTTCCGGAGACTCTAAGGCCTCCCGATACAGAGCAGAGGCCACACACACACACACCATGGAATACTATTCAGCCAAAAAA	CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC	NM:i:0	MD:Z:40	AS:i:40	XS:i:40	RG:Z:Fsimout_mB	SA:Z:rgi|59585|emb|X04615.1|,2000,-,40S46M4S,60,0;	YC:Z:CT	YD:Z:f]
-[sf95_Ref_48245009_48245108_48245208_Vir_-_2000_2044_R_100_90	177	chr18	48245109	9	40S50M	gi|59585|emb|X04615.1|2000	0	GTTCCGGAGACTCTAAGGCCTCCCGATACAGAGCAGAGGCCACACACACACACACCATGGAATACTATTCAGCCAAAAAAAGGAATTCAA	CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC	NM:i:0	MD:Z:50	AS:i:50	XS:i:46	RG:Z:Fsimout_mB	SA:Z:rgi|59585|emb|X04615.1|,2000,+,50S40M,9,0;	YC:Z:GA	YD:Z:f]
+[sf95_Ref_48245009_48245108_48245208_Vir_-_2000_2044_R_100_90	113	gi|59585|emb|X04615.1|	2000	60	40S46M4S	chr18	48245109	0	TTTTTTGGCTGAATAGTATTCCATGGTGTGTGTGTGTGTGGCCTCTGCTCTGTATCGGGAGGCCTTAGAGTCTCCGGAACATTGTTGTGT	CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC	NM:i:0	MD:Z:46	AS:i:46	XS:i:27	RG:Z:Fsimout_mB	SA:Z:fchr2,13996555,+,50S40M,0,0;	YC:Z:CT	YD:Z:r]
*/
					/*if (sam_format1(h, d, &ks2) < 0) {
						fprintf(stderr, "Error writing output.\n");
						exit_code = 1;
						break;
					}*/
					if (((flag & 3) == 3) /*&& enoughMapQ >= myConfig.samples*/) {
						/*printf(">%d[%s]\n",checkMapQ(ChrIsHum, b, true),ks_str(&ks1));
						printf("-%d[%s]\n",checkMapQ(ChrIsHum, d, false),ks_str(&ks2));
						if (flag & 4) {
							printf("+%d[%s]\n",checkMapQ(ChrIsHum, d2, false),ks_str(&ks3));
						}
						printf("<--%d\n",enoughMapQ);*/
						bam_aux_append(b, "Zd", 'Z', 2, (uint8_t*)"H");
						if (sam_plp_push(ChrIsHum, pierCluster, b) == 0) {
							//printf("--HumRange=%s:%d-%d\n", h->target_name[(pierCluster->HumanRange).tid], (pierCluster->HumanRange).pos, (pierCluster->HumanRange).endpos);
							if ((!ChrIsHum[(d->core).tid]) && (flag & 2)) {
								bam_aux_append(d, "Zd", 'Z', 2, (uint8_t*)"V");
								sam_plp_push(ChrIsHum, pierCluster, d);
							}
							if ((!ChrIsHum[(d2->core).tid]) && (flag & 4)) {
								bam_aux_append(d2, "Zd", 'Z', 2, (uint8_t*)"V");
								sam_plp_push(ChrIsHum, pierCluster, d2);
							}
						} else {
							++blockid;
							//print
#ifdef DEBUGa
							fprintf(fsdump,"[%u %s]\nHumRange=%s:%d-%d\n", blockid, BamID, h->target_name[(pierCluster->HumanRange).tid], (pierCluster->HumanRange).pos, (pierCluster->HumanRange).endpos);
							fprintf(fsdump,"VirRange=%s:%d-%d\n", h->target_name[(pierCluster->VirusRange).tid], (pierCluster->VirusRange).pos, (pierCluster->VirusRange).endpos);
#endif
							fprintf(fs,"[%u]\nBamID=%s\nHumRange=%s:%d-%d\n",blockid, BamID,
							h->target_name[(pierCluster->HumanRange).tid], (pierCluster->HumanRange).pos, (pierCluster->HumanRange).endpos);
							if ( (pierCluster->VirusRange).pos == 0 && (pierCluster->VirusRange).endpos == 0 ) {
								fprintf(fs,"VirRange=NA\n");
							} else {
								fprintf(fs,"VirRange=%s:%d-%d\n", h->target_name[(pierCluster->VirusRange).tid],
									(pierCluster->VirusRange).pos, (pierCluster->VirusRange).endpos);
							}
							for (size_t i=0; i<kv_size(pierCluster->Reads);++i) {
								bam1_t *bi = kv_A(pierCluster->Reads, i);
								bam_aux_append(bi, "Zc", 'i', sizeof(uint32_t), (uint8_t*)&blockid);
#ifdef DEBUGa
								if (sam_format1(h, bi, &ks1) < 0) {
									fprintf(stderr, "Error writing output.\n");
									exit_code = 1;
									break;
								} else {
									fprintf(fsdump,"%s\n",ks1.s);
								}
#endif
								if (sam_write1(out, h, bi) < 0) {
									fprintf(stderr, "[x]Error writing output.\n");
									exit_code = 1;
									break;
								}
							}
#ifdef DEBUGa
							fprintf(fsdump,"\n");
							printf("HumRange=%s:%d-%d\n", h->target_name[(pierCluster->HumanRange).tid], (pierCluster->HumanRange).pos, (pierCluster->HumanRange).endpos);
							//fflush(fs);
#endif
							sam_plp_dectroy(pierCluster);
							pierCluster = sam_plp_init();
						}
					}
				}
				//char *qname = bam_get_qname(b);
			}
			r = sam_close(out);   // stdout can only be closed once
			if (r < 0) {
				fprintf(stderr, "Error closing output.\n");
				exit_code = 1;
			}

			hts_idx_destroy(idx);
			bam_destroy1(b);
			bam_destroy1(d);
			bam_destroy1(d2);
			bam_hdr_destroy(h);
			r = sam_close(in);
			free(ChrIsHum);
#ifdef DEBUGa
			//fflush(NULL);
			//pressAnyKey();
#endif
			sam_plp_dectroy(pierCluster);
			//printf("<[%d]:\n",bami);
		}
	}
#ifdef DEBUGa
	fclose(fsdump);
#endif
	fclose(fs);
	getPairedSam(NULL, NULL, NULL, NULL);	// sam_close(fp2);
	//printf("---[%d]---\n",exit_code);
	bam_destroy1(bR1); bam_destroy1(bR2); bam_destroy1(bR3);
	ks_release(&ks1);
	ks_release(&ks2);
	ks_release(&ks3);
	ks_release(&kstr);
	//free((char*)filePrefix);
	return exit_code;
}
Beispiel #22
0
int main_samview(int argc, char *argv[])
{
	samFile *in;
	char *fn_ref = 0;
	int flag = 0, c, clevel = -1, ignore_sam_err = 0;
	char moder[8];
	bam_hdr_t *h;
	bam1_t *b;

	while ((c = getopt(argc, argv, "IbSl:t:")) >= 0) {
		switch (c) {
		case 'S': flag |= 1; break;
		case 'b': flag |= 2; break;
		case 'l': clevel = atoi(optarg); flag |= 2; break;
		case 't': fn_ref = optarg; break;
		case 'I': ignore_sam_err = 1; break;
		}
	}
	if (argc == optind) {
		fprintf(stderr, "Usage: samview [-bSI] [-l level] <in.bam>|<in.sam> [region]\n");
		return 1;
	}
	strcpy(moder, "r");
	if ((flag&1) == 0) strcat(moder, "b");

	in = sam_open(argv[optind], moder, fn_ref);
	h = sam_hdr_read(in);
	h->ignore_sam_err = ignore_sam_err;
	b = bam_init1();

	if ((flag&4) == 0) { // SAM/BAM output
		htsFile *out;
		char modew[8];
		strcpy(modew, "w");
		if (clevel >= 0 && clevel <= 9) sprintf(modew + 1, "%d", clevel);
		if (flag&2) strcat(modew, "b");
		out = hts_open("-", modew, 0);
		sam_hdr_write(out, h);
		if (optind + 1 < argc && !(flag&1)) { // BAM input and has a region
			int i;
			hts_idx_t *idx;
			if ((idx = bam_index_load(argv[optind])) == 0) {
				fprintf(stderr, "[E::%s] fail to load the BAM index\n", __func__);
				return 1;
			}
			for (i = optind + 1; i < argc; ++i) {
				hts_itr_t *iter;
				if ((iter = bam_itr_querys(idx, h, argv[i])) == 0) {
					fprintf(stderr, "[E::%s] fail to parse region '%s'\n", __func__, argv[i]);
					continue;
				}
				while (bam_itr_next((BGZF*)in->fp, iter, b) >= 0) sam_write1(out, h, b);
				hts_itr_destroy(iter);
			}
			hts_idx_destroy(idx);
		} else while (sam_read1(in, h, b) >= 0) sam_write1(out, h, b);
		sam_close(out);
	}

	bam_destroy1(b);
	bam_hdr_destroy(h);
	sam_close(in);
	return 0;
}
Beispiel #23
0
int main(int argc, char **argv) {
    htsFile *in = NULL;
    htsFile *out = NULL;
    char *in_name = "-";
    char *out_name = "-";
    char *ref_name = NULL;
    char *ref_seq = NULL;
    char modew[8] = "w";
    faidx_t *fai = NULL;
    bam_hdr_t *hdr = NULL;
    bam1_t *rec = NULL;
    int c, res, last_ref = -1, ref_len = 0;
    int adjust = 0, extended = 0, recalc = 0, flags = 0;

    while ((c = getopt(argc, argv, "aef:hi:o:r")) >= 0) {
        switch (c) {
        case 'a': adjust = 1; break;
        case 'e': extended = 1; break;
        case 'f': ref_name = optarg; break;
        case 'h': usage(argv[0]); return EXIT_SUCCESS;
        case 'i': in_name = optarg; break;
        case 'o': out_name = optarg; break;
        case 'r': recalc = 1; break;
        default: usage(argv[0]); return EXIT_FAILURE;
        }
    }

    if (!ref_name) {
        usage(argv[0]);
        return EXIT_FAILURE;
    }

    flags = (adjust ? 1 : 0) | (extended ? 2 : 0) | (recalc ? 4 : 0);

    fai = fai_load(ref_name);
    if (!fai) {
        fprintf(stderr, "Couldn't load reference %s\n", ref_name);
        goto fail;
    }

    rec = bam_init1();
    if (!rec) {
        perror(NULL);
        goto fail;
    }

    in = hts_open(in_name, "r");
    if (!in) {
        fprintf(stderr, "Couldn't open %s : %s\n", in_name, strerror(errno));
        goto fail;
    }

    hdr = sam_hdr_read(in);
    if (!hdr) {
        fprintf(stderr, "Couldn't read header for %s\n", in_name);
        goto fail;
    }

    out = hts_open(out_name, modew);
    if (!out) {
        fprintf(stderr, "Couldn't open %s : %s\n", out_name, strerror(errno));
        goto fail;
    }

    if (sam_hdr_write(out, hdr) < 0) {
        fprintf(stderr, "Couldn't write header to %s : %s\n",
                out_name, strerror(errno));
        goto fail;
    }

    while ((res = sam_read1(in, hdr, rec)) >= 0) {
        if (rec->core.tid >= hdr->n_targets) {
            fprintf(stderr, "Invalid BAM reference id %d\n", rec->core.tid);
            goto fail;
        }
        if (last_ref != rec->core.tid && rec->core.tid >= 0) {
            free(ref_seq);
            ref_seq = faidx_fetch_seq(fai, hdr->target_name[rec->core.tid],
                                      0, INT_MAX, &ref_len);
            if (!ref_seq) {
                fprintf(stderr, "Couldn't get reference %s\n",
                        hdr->target_name[rec->core.tid]);
                goto fail;
            }
            last_ref = rec->core.tid;
        }
        if (rec->core.tid >= 0) {
            res = sam_prob_realn(rec, ref_seq, ref_len, flags);
            if (res <= -4) {
                fprintf(stderr, "Error running sam_prob_realn : %s\n",
                        strerror(errno));
                goto fail;
            }
        }
        if (sam_write1(out, hdr, rec) < 0) {
            fprintf(stderr, "Error writing to %s\n", out_name);
            goto fail;
        }
    }
    res = hts_close(in);
    in = NULL;
    if (res < 0) {
        fprintf(stderr, "Error closing %s\n", in_name);
        goto fail;
    }

    res = hts_close(out);
    out = NULL;
    if (res < 0) {
        fprintf(stderr, "Error closing %s\n", out_name);
        goto fail;
    }

    bam_hdr_destroy(hdr);
    bam_destroy1(rec);
    free(ref_seq);
    fai_destroy(fai);

    return EXIT_SUCCESS;

 fail:
    if (hdr) bam_hdr_destroy(hdr);
    if (rec) bam_destroy1(rec);
    if (in) hts_close(in);
    if (out) hts_close(out);
    free(ref_seq);
    fai_destroy(fai);
    return EXIT_FAILURE;
}
Beispiel #24
0
/*!
  @abstract    Merge multiple sorted BAM.
  @param  is_by_qname whether to sort by query name
  @param  out         output BAM file name
  @param  mode        sam_open() mode to be used to create the final output file
                      (overrides level settings from UNCOMP and LEVEL1 flags)
  @param  headers     name of SAM file from which to copy '@' header lines,
                      or NULL to copy them from the first file to be merged
  @param  n           number of files to be merged
  @param  fn          names of files to be merged
  @param  flag        flags that control how the merge is undertaken
  @param  reg         region to merge
  @param  n_threads   number of threads to use (passed to htslib)
  @discussion Padding information may NOT correctly maintained. This
  function is NOT thread safe.
 */
int bam_merge_core2(int by_qname, const char *out, const char *mode, const char *headers, int n, char * const *fn, int flag, const char *reg, int n_threads)
{
    samFile *fpout, **fp;
    heap1_t *heap;
    bam_hdr_t *hout = NULL;
    int i, j, *RG_len = NULL;
    uint64_t idx = 0;
    char **RG = NULL;
    hts_itr_t **iter = NULL;
    bam_hdr_t **hdr = NULL;
    trans_tbl_t *translation_tbl = NULL;

    // Is there a specified pre-prepared header to use for output?
    if (headers) {
        samFile* fpheaders = sam_open(headers, "r");
        if (fpheaders == NULL) {
            const char *message = strerror(errno);
            fprintf(pysamerr, "[bam_merge_core] cannot open '%s': %s\n", headers, message);
            return -1;
        }
        hout = sam_hdr_read(fpheaders);
        sam_close(fpheaders);
    }

    g_is_by_qname = by_qname;
    fp = (samFile**)calloc(n, sizeof(samFile*));
    heap = (heap1_t*)calloc(n, sizeof(heap1_t));
    iter = (hts_itr_t**)calloc(n, sizeof(hts_itr_t*));
    hdr = (bam_hdr_t**)calloc(n, sizeof(bam_hdr_t*));
    translation_tbl = (trans_tbl_t*)calloc(n, sizeof(trans_tbl_t));
    // prepare RG tag from file names
    if (flag & MERGE_RG) {
        RG = (char**)calloc(n, sizeof(char*));
        RG_len = (int*)calloc(n, sizeof(int));
        for (i = 0; i != n; ++i) {
            int l = strlen(fn[i]);
            const char *s = fn[i];
            if (l > 4 && strcmp(s + l - 4, ".bam") == 0) l -= 4;
            for (j = l - 1; j >= 0; --j) if (s[j] == '/') break;
            ++j; l -= j;
            RG[i] = (char*)calloc(l + 1, 1);
            RG_len[i] = l;
            strncpy(RG[i], s + j, l);
        }
    }
    // open and read the header from each file
    for (i = 0; i < n; ++i) {
        bam_hdr_t *hin;
        fp[i] = sam_open(fn[i], "r");
        if (fp[i] == NULL) {
            int j;
            fprintf(pysamerr, "[bam_merge_core] fail to open file %s\n", fn[i]);
            for (j = 0; j < i; ++j) sam_close(fp[j]);
            free(fp); free(heap);
            // FIXME: possible memory leak
            return -1;
        }
        hin = sam_hdr_read(fp[i]);
        if (hout)
            trans_tbl_init(hout, hin, translation_tbl+i, flag & MERGE_COMBINE_RG, flag & MERGE_COMBINE_PG);
        else {
            // As yet, no headers to merge into...
            hout = bam_hdr_dup(hin);
            // ...so no need to translate header into itself
            trans_tbl_init(hout, hin, translation_tbl+i, true, true);
        }

        // TODO sam_itr_next() doesn't yet work for SAM files,
        // so for those keep the headers around for use with sam_read1()
        if (hts_get_format(fp[i])->format == sam) hdr[i] = hin;
        else { bam_hdr_destroy(hin); hdr[i] = NULL; }

        if ((translation_tbl+i)->lost_coord_sort && !by_qname) {
            fprintf(pysamerr, "[bam_merge_core] Order of targets in file %s caused coordinate sort to be lost\n", fn[i]);
        }
    }

    // Transform the header into standard form
    pretty_header(&hout->text,hout->l_text);

    // If we're only merging a specified region move our iters to start at that point
    if (reg) {
        int* rtrans = rtrans_build(n, hout->n_targets, translation_tbl);

        int tid, beg, end;
        const char *name_lim = hts_parse_reg(reg, &beg, &end);
        char *name = malloc(name_lim - reg + 1);
        memcpy(name, reg, name_lim - reg);
        name[name_lim - reg] = '\0';
        tid = bam_name2id(hout, name);
        free(name);
        if (tid < 0) {
            fprintf(pysamerr, "[%s] Malformated region string or undefined reference name\n", __func__);
            return -1;
        }
        for (i = 0; i < n; ++i) {
            hts_idx_t *idx = sam_index_load(fp[i], fn[i]);
            // (rtrans[i*n+tid]) Look up what hout tid translates to in input tid space
            int mapped_tid = rtrans[i*hout->n_targets+tid];
            if (mapped_tid != INT32_MIN) {
                iter[i] = sam_itr_queryi(idx, mapped_tid, beg, end);
            } else {
                iter[i] = sam_itr_queryi(idx, HTS_IDX_NONE, 0, 0);
            }
            hts_idx_destroy(idx);
            if (iter[i] == NULL) break;
        }
        free(rtrans);
    } else {
        for (i = 0; i < n; ++i) {
            if (hdr[i] == NULL) {
                iter[i] = sam_itr_queryi(NULL, HTS_IDX_REST, 0, 0);
                if (iter[i] == NULL) break;
            }
            else iter[i] = NULL;
        }
    }

    if (i < n) {
        fprintf(pysamerr, "[%s] Memory allocation failed\n", __func__);
        return -1;
    }

    // Load the first read from each file into the heap
    for (i = 0; i < n; ++i) {
        heap1_t *h = heap + i;
        h->i = i;
        h->b = bam_init1();
        if ((iter[i]? sam_itr_next(fp[i], iter[i], h->b) : sam_read1(fp[i], hdr[i], h->b)) >= 0) {
            bam_translate(h->b, translation_tbl + i);
            h->pos = ((uint64_t)h->b->core.tid<<32) | (uint32_t)((int32_t)h->b->core.pos+1)<<1 | bam_is_rev(h->b);
            h->idx = idx++;
        }
        else {
            h->pos = HEAP_EMPTY;
            bam_destroy1(h->b);
            h->b = NULL;
        }
    }

    // Open output file and write header
    if ((fpout = sam_open(out, mode)) == 0) {
        fprintf(pysamerr, "[%s] fail to create the output file.\n", __func__);
        return -1;
    }
    sam_hdr_write(fpout, hout);
    if (!(flag & MERGE_UNCOMP)) hts_set_threads(fpout, n_threads);

    // Begin the actual merge
    ks_heapmake(heap, n, heap);
    while (heap->pos != HEAP_EMPTY) {
        bam1_t *b = heap->b;
        if (flag & MERGE_RG) {
            uint8_t *rg = bam_aux_get(b, "RG");
            if (rg) bam_aux_del(b, rg);
            bam_aux_append(b, "RG", 'Z', RG_len[heap->i] + 1, (uint8_t*)RG[heap->i]);
        }
        sam_write1(fpout, hout, b);
        if ((j = (iter[heap->i]? sam_itr_next(fp[heap->i], iter[heap->i], b) : sam_read1(fp[heap->i], hdr[heap->i], b))) >= 0) {
            bam_translate(b, translation_tbl + heap->i);
            heap->pos = ((uint64_t)b->core.tid<<32) | (uint32_t)((int)b->core.pos+1)<<1 | bam_is_rev(b);
            heap->idx = idx++;
        } else if (j == -1) {
            heap->pos = HEAP_EMPTY;
            bam_destroy1(heap->b);
            heap->b = NULL;
        } else fprintf(pysamerr, "[bam_merge_core] '%s' is truncated. Continue anyway.\n", fn[heap->i]);
        ks_heapadjust(heap, 0, n, heap);
    }

    // Clean up and close
    if (flag & MERGE_RG) {
        for (i = 0; i != n; ++i) free(RG[i]);
        free(RG); free(RG_len);
    }
    for (i = 0; i < n; ++i) {
        trans_tbl_destroy(translation_tbl + i);
        hts_itr_destroy(iter[i]);
        bam_hdr_destroy(hdr[i]);
        sam_close(fp[i]);
    }
    bam_hdr_destroy(hout);
    sam_close(fpout);
    free(translation_tbl); free(fp); free(heap); free(iter); free(hdr);
    return 0;
}
Beispiel #25
0
/******************************************************************************
*
*   The main worker node function.
*
*   int thread_id: the thread_id
*   char *fastq1: FIFO from which bowtie2 can get read1
*   char *fastq2: FIFO from which bowtie2 can get read2 (if it exists)
*
*******************************************************************************/
void herd_worker_node(int thread_id, char *fastq1, char *fastq2) {
    int cmd_length = 1, max_qname = 0, status, strand;
    char *cmd, *last_qname = calloc(1, sizeof(char));
    MPI_Header *packed_header;
    MPI_read *packed_read = calloc(1, sizeof(MPI_read));
    bam_hdr_t *header;
    bam1_t *read1 = bam_init1();
    bam1_t *read2 = bam_init1();
    samFile *fp;
#ifdef DEBUG
    MPI_Status stat;
    int current_p_size = 100;
    htsFile *of;
    bam_hdr_t *debug_header = bam_hdr_init();
    bam1_t *debug_read = bam_init1();
    global_header = bam_hdr_init();
    void *p = calloc(100,1);
    char *oname = NULL;
#else
    int i = 0;
#endif
    time_t t0, t1;
    int swapped = 0;
    assert(last_qname);
    assert(packed_read);

    //Which strand should we be aligning to?
    if(config.directional) {
        strand = (thread_id-1) % 2;
    } else {
        strand = (thread_id-1) % 4;
    }

    packed_read->size = 0;
    packed_read->packed = NULL;

    //construct the bowtie2 command
    cmd_length += (int) strlen("bowtie2 -q --reorder") + 1;
    cmd_length += (int) strlen(config.bowtie2_options) + 1;
    cmd_length += (int) strlen("--norc -x") + 1;
    cmd_length += (int) strlen(config.genome_dir) + strlen("bisulfite_genome/CT_conversion/BS_CT") + 1;
    cmd_length += (int) 2*(strlen("-1 ") + strlen(fastq1)) + 3;
    if(config.paired) cmd_length += (int) strlen(fastq2); //This is likely unneeded.

#ifdef DEBUG
    oname = malloc(sizeof(char) *(1+strlen(config.odir)+strlen(config.basename)+strlen("_X.bam")));
    assert(oname);
    sprintf(oname, "%s%s_%i.bam", config.odir, config.basename, thread_id);
    if(!config.quiet) fprintf(stderr, "Writing output to %s\n", oname);
    of = sam_open(oname, "wb");
    free(oname);
#endif

    cmd = (char *) malloc(sizeof(char) * cmd_length);
    assert(cmd);
    if(strand == 0) { //OT Read#1 C->T, Read#2 G->A, Genome C->T only the + strand
        if(config.paired) {
            sprintf(cmd, "bowtie2 -q --reorder %s --norc -x %sbisulfite_genome/CT_conversion/BS_CT -1 %s -2 %s", config.bowtie2_options, config.genome_dir, fastq1, fastq2);
        } else {
            sprintf(cmd, "bowtie2 -q --reorder %s --norc -x %sbisulfite_genome/CT_conversion/BS_CT -U %s", config.bowtie2_options, config.genome_dir, fastq1);
        }
    } else if(strand == 1) { //OB Read#1 C->T, Read#2 G->A, Genome G->A only the - strand
        if(config.paired) {
            sprintf(cmd, "bowtie2 -q --reorder %s --nofw -x %sbisulfite_genome/GA_conversion/BS_GA -1 %s -2 %s", config.bowtie2_options, config.genome_dir, fastq1, fastq2);
        } else {
            sprintf(cmd, "bowtie2 -q --reorder %s --nofw -x %sbisulfite_genome/GA_conversion/BS_GA -U %s", config.bowtie2_options, config.genome_dir, fastq1);
        }
    } else if(strand == 2) { //CTOT Read#1 G->A, Read#2 C->T, Genome C->T, only the - strand
        if(config.paired) {
            sprintf(cmd, "bowtie2 -q --reorder %s --nofw -x %sbisulfite_genome/CT_conversion/BS_CT -1 %s -2 %s", config.bowtie2_options, config.genome_dir, fastq1, fastq2);
        } else {
            sprintf(cmd, "bowtie2 -q --reorder %s --nofw -x %sbisulfite_genome/CT_conversion/BS_CT -U %s", config.bowtie2_options, config.genome_dir, fastq1);
        }
    } else if(strand == 3) { //CTOB Read#1 G->A, Read#2 C->T, Genome G->A, only the + strand
        if(config.paired) {
            sprintf(cmd, "bowtie2 -q --reorder %s --norc -x %sbisulfite_genome/GA_conversion/BS_GA -1 %s -2 %s", config.bowtie2_options, config.genome_dir, fastq1, fastq2);
        } else {
            sprintf(cmd, "bowtie2 -q --reorder %s --norc -x %sbisulfite_genome/GA_conversion/BS_GA -U %s", config.bowtie2_options, config.genome_dir, fastq1);
        }
    } else {
        fprintf(stderr, "Oh shit, got strand %i!\n", strand);
        return;
    }

    //Start the process
    if(!config.quiet) fprintf(stderr, "Node %i executing: %s\n", thread_id, cmd); fflush(stderr);
    fp = sam_popen(cmd);
    header = sam_hdr_read(fp);
#ifdef DEBUG
    sam_hdr_write(of, header);
#endif

#ifndef DEBUG
    packed_header = pack_header(header);
    if(thread_id == 1) {
        //Send the header
        MPI_Send((void *) &(packed_header->size), 1, MPI_INT, 0, 1, MPI_COMM_WORLD);
        status = MPI_Send((void *) packed_header->packed, packed_header->size, MPI_BYTE, 0, 2, MPI_COMM_WORLD);
        if(status != MPI_SUCCESS) {
            fprintf(stderr, "MPI_Send returned %i\n", status);
            fflush(stderr);
        }
    }
#else
    packed_header = pack_header(header);
    void *tmp_pointer = malloc(packed_header->size);
    assert(tmp_pointer);
    MPI_Request request;
    MPI_Isend((void *) packed_header->packed, packed_header->size, MPI_BYTE, 0, 2, MPI_COMM_WORLD, &request);
    status = MPI_Recv(tmp_pointer, packed_header->size, MPI_BYTE, 0, 2, MPI_COMM_WORLD, &stat);
    if(status != MPI_SUCCESS) fprintf(stderr, "We seem to have not been able to send the message to ourselves!\n");
    MPI_Wait(&request, &stat);
    unpack_header(debug_header, tmp_pointer);
    global_header = debug_header;
    free(tmp_pointer);
#endif

    t0 = time(NULL);
    if(!config.quiet) fprintf(stderr, "Node %i began sending reads @%s", thread_id, ctime(&t0)); fflush(stderr);
    while(sam_read1(fp, header, read1) >= 0) {
#ifdef DEBUG
        sam_write1(of, global_header, read1);
#endif
        if(strcmp(bam_get_qname(read1), last_qname) == 0) { //Multimapper
            if(config.paired) {
                sam_read1(fp, header, read2);
#ifdef DEBUG
                sam_write1(of, global_header, read2);
#endif
            }
            continue;
        } else {
            if(read1->core.l_qname > max_qname) {
                max_qname = read1->core.l_qname + 10;
                last_qname = realloc(last_qname, sizeof(char) * max_qname);
                assert(last_qname);
            }
            strcpy(last_qname, bam_get_qname(read1));
        }

        //Are paired-end reads in the wrong order?
        swapped = 0;
        if(config.paired) {
            if(read1->core.flag & BAM_FREAD2) {
                swapped = 1;
                sam_read1(fp, header, read2);
                packed_read = pack_read(read2, packed_read);
#ifndef DEBUG
                MPI_Send((void *) packed_read->packed, packed_read->size, MPI_BYTE, 0, 5, MPI_COMM_WORLD);
#else
                sam_write1(of, global_header, read2);
                if(packed_read->size > current_p_size) {
                    p = realloc(p, packed_read->size);
                    assert(p);
                }
                MPI_Isend((void *) packed_read->packed, packed_read->size, MPI_BYTE, 0, 5, MPI_COMM_WORLD, &request);
                status = MPI_Recv(p, packed_read->size, MPI_BYTE, 0, 5, MPI_COMM_WORLD, &stat);
                MPI_Wait(&request, &stat);
                debug_read = unpack_read(debug_read, p);
#endif
            }
        }

        //Send the read
        packed_read = pack_read(read1, packed_read);
#ifndef DEBUG
        MPI_Send((void *) packed_read->packed, packed_read->size, MPI_BYTE, 0, 5, MPI_COMM_WORLD);
#else
        if(packed_read->size > current_p_size) {
            p = realloc(p, packed_read->size);
            assert(p);
        }
        MPI_Isend(packed_read->packed, packed_read->size, MPI_BYTE, 0, 5, MPI_COMM_WORLD, &request);
        status = MPI_Recv(p, packed_header->size, MPI_BYTE, 0, 5, MPI_COMM_WORLD, &stat);
        MPI_Wait(&request, &stat);
#endif
        //Deal with paired-end reads
        if(config.paired && !swapped) {
            sam_read1(fp, header, read2);
            packed_read = pack_read(read2, packed_read);
#ifndef DEBUG
            MPI_Send((void *) packed_read->packed, packed_read->size, MPI_BYTE, 0, 5, MPI_COMM_WORLD);
#else
            sam_write1(of, global_header, read2);
            if(packed_read->size > current_p_size) {
                p = realloc(p, packed_read->size);
                assert(p);
            }
            MPI_Isend((void *) packed_read->packed, packed_read->size, MPI_BYTE, 0, 5, MPI_COMM_WORLD, &request);
            status = MPI_Recv(p, packed_header->size, MPI_BYTE, 0, 5, MPI_COMM_WORLD, &stat);
            MPI_Wait(&request, &stat);
            debug_read = unpack_read(debug_read, p);
#endif
        }
#ifndef DEBUG
        i++;
#endif
    }
    t1 = time(NULL);
    if(!config.quiet) fprintf(stderr, "Node %i finished sending reads @%s\t(%f sec elapsed)\n", thread_id, ctime(&t1), difftime(t1, t0)); fflush(stderr);

    //Notify the master node
    packed_read->size = 0;
#ifndef DEBUG
    void *A = malloc(1);
    assert(A);
    MPI_Send(A, 1, MPI_BYTE, 0, 5, MPI_COMM_WORLD);
    free(A);
#endif

    //Close things up
    bam_hdr_destroy(header);
    bam_destroy1(read1);
    bam_destroy1(read2);
    free(cmd);
    if(packed_read->packed != NULL) free(packed_read->packed);
    free(packed_read);
    if(packed_header->packed != NULL) free(packed_header->packed);
    free(packed_header);
    free(last_qname);
    sam_pclose(fp);
    //Remove the FIFO(s)
    unlink(fastq1);
    if(config.paired) unlink(fastq2);
#ifdef DEBUG
    sam_close(of);
    bam_hdr_destroy(debug_header);
    bam_destroy1(debug_read);
    free(p);
#endif
    if(!config.quiet) fprintf(stderr, "Exiting worker node %i\n", thread_id); fflush(stderr);
};