Example #1
0
static int add_read_groups_file(const char *subcmd, samview_settings_t *settings, char *fn)
{
    FILE *fp;
    char buf[1024];
    int ret = 0;
    if (settings->rghash == NULL) {
        settings->rghash = kh_init(rg);
        if (settings->rghash == NULL) {
            perror(NULL);
            return -1;
        }
    }

    fp = fopen(fn, "r");
    if (fp == NULL) {
        print_error_errno(subcmd, "failed to open \"%s\" for reading", fn);
        return -1;
    }

    while (ret != -1 && !feof(fp) && fscanf(fp, "%1023s", buf) > 0) {
        char *d = strdup(buf);
        if (d != NULL) {
            kh_put(rg, settings->rghash, d, &ret);
            if (ret == 0) free(d); /* Duplicate */
        } else {
            ret = -1;
        }
    }
    if (ferror(fp)) ret = -1;
    if (ret == -1) {
        print_error_errno(subcmd, "failed to read \"%s\"", fn);
    }
    fclose(fp);
    return (ret != -1) ? 0 : -1;
}
Example #2
0
static bool readgroupise(state_t* state)
{
    if (sam_hdr_write(state->output_file, state->output_header) != 0) {
        print_error_errno("addreplacerg", "[%s] Could not write header to output file", __func__);
        return false;
    }

    bam1_t* file_read = bam_init1();
    int ret;
    while ((ret = sam_read1(state->input_file, state->input_header, file_read)) >= 0) {
        state->mode_func(state, file_read);

        if (sam_write1(state->output_file, state->output_header, file_read) < 0) {
            print_error_errno("addreplacerg", "[%s] Could not write read to output file", __func__);
            bam_destroy1(file_read);
            return false;
        }
    }
    bam_destroy1(file_read);
    if (ret != -1) {
        print_error_errno("addreplacerg", "[%s] Error reading from input file", __func__);
        return false;
    } else {
        return true;
    }
}
Example #3
0
static inline int check_sam_write1(samFile *fp, const bam_hdr_t *h, const bam1_t *b, const char *fname, int *retp)
{
    int r = sam_write1(fp, h, b);
    if (r >= 0) return r;

    if (fname) print_error_errno("view", "writing to \"%s\" failed", fname);
    else print_error_errno("view", "writing to standard output failed");

    *retp = EXIT_FAILURE;
    return r;
}
Example #4
0
static bool destroy_state(const bam2fq_opts_t *opts, bam2fq_state_t *state, int* status)
{
    bool valid = true;
    bam_hdr_destroy(state->h);
    check_sam_close("bam2fq", state->fp, opts->fn_input, "file", status);
    if (state->fpse && fclose(state->fpse)) { print_error_errno("bam2fq", "Error closing singleton file \"%s\"", opts->fnse); valid = false; }
    int i;
    for (i = 0; i < 3; ++i) {
        if (state->fpr[i] != stdout && fclose(state->fpr[i])) { print_error_errno("bam2fq", "Error closing r%d file \"%s\"", i, opts->fnr[i]); valid = false; }
    }
    free(state);
    return valid;
}
Example #5
0
static int		accept_new_client(t_srv_socket sockets[], int s_sockfd)
{
  struct sockaddr_in	cl_sockaddr;
  socklen_t		cl_sockaddr_size;
  int			cl_sockfd;
  bool const		is_connected = true;

  cl_sockaddr_size = sizeof(cl_sockaddr);
  if ((cl_sockfd = accept(s_sockfd, (struct sockaddr *)&cl_sockaddr,
			  &cl_sockaddr_size)) == SYSERR)
    return (-print_error_errno("accept"));
  if (!sockets)
    {
      if (close(cl_sockfd) == SYSERR)
	return (print_error_errno("close"));
      return (EXIT_SUCCESS);
    }
  write(cl_sockfd, &is_connected, sizeof(bool));
  return (srv_init_socket(sockets, CLIENT, cl_sockfd, &cl_sockaddr));
}
Example #6
0
/*
 * Check the files are consistent and capable of being concatenated.
 * Also fills out the rg2id read-group hash and the version numbers
 * and produces a new bam_hdr_t structure with merged RG lines.
 * Note it is only a simple merge, as we lack the niceties of a proper
 * header API.
 *
 * Returns updated header on success;
 *        NULL on failure.
 */
static bam_hdr_t *cram_cat_check_hdr(int nfn, char * const *fn, const bam_hdr_t *h,
                                     khash_s2i **rg2id, int *vers_maj_p, int *vers_min_p) {
    int i, vers_maj = -1, vers_min = -1;
    bam_hdr_t *new_h = NULL;

    if (h) {
        new_h = bam_hdr_dup(h);
        *rg2id = hash_rg(new_h);
    }

    for (i = 0; i < nfn; ++i) {
        samFile *in;
        cram_fd *in_c;
        khint_t ki;
        int new_rg = -1;

        in = sam_open(fn[i], "rc");
        if (in == 0) {
            print_error_errno("cat", "fail to open file '%s'", fn[i]);
            return NULL;
        }
        in_c = in->fp.cram;

        int vmaj = cram_major_vers(in_c);
        int vmin = cram_minor_vers(in_c);
        if ((vers_maj != -1 && vers_maj != vmaj) ||
            (vers_min != -1 && vers_min != vmin)) {
            fprintf(pysam_stderr, "[%s] ERROR: input files have differing version numbers.\n",
                    __func__);
            return NULL;
        }
        vers_maj = vmaj;
        vers_min = vmin;

        bam_hdr_t *old = sam_hdr_read(in);
        khash_s2i *rg2id_in = hash_rg(old);

        if (!new_h) {
            new_h = bam_hdr_dup(old);
            *rg2id = hash_rg(new_h);
        }

        // Add any existing @RG entries to our global @RG hash.
        for (ki = 0; ki < rg2id_in->n_id; ki++) {
            int added;

            new_rg = hash_s2i_inc(*rg2id, rg2id_in->id[ki], rg2id_in->line[ki], &added);
            //fprintf(pysam_stderr, "RG %s: #%d -> #%d\n",
            //        rg2id_in->id[ki], ki, new_rg);

            if (added) {
                // Also add to new_h
                const char *line = rg2id_in->line[ki];
                const char *line_end = line;
                while (*line && *line_end++ != '\n')
                    ;
                new_h->l_text += line_end - line;
                new_h->text = realloc(new_h->text, new_h->l_text+1);
                strncat(&new_h->text[new_h->l_text - (line_end - line)],
                        line, line_end - line);
            }

            if (new_rg != ki && rg2id_in->n_id > 1) {
                fprintf(pysam_stderr, "[%s] ERROR: Same size @RG lists but differing order / contents\n",
                        __func__);
                return NULL;
            }
        }

        hash_s2i_free(rg2id_in);
        bam_hdr_destroy(old);
        sam_close(in);
    }

    *vers_maj_p = vers_maj;
    *vers_min_p = vers_min;

    return new_h;
}
Example #7
0
int bam_fillmd(int argc, char *argv[])
{
    int c, flt_flag, tid = -2, ret, len, is_bam_out, is_uncompressed, max_nm, is_realn, capQ, baq_flag;
    samFile *fp = NULL, *fpout = NULL;
    bam_hdr_t *header = NULL;
    faidx_t *fai = NULL;
    char *ref = NULL, mode_w[8], *ref_file;
    bam1_t *b = NULL;
    sam_global_args ga = SAM_GLOBAL_ARGS_INIT;

    static const struct option lopts[] = {
        SAM_OPT_GLOBAL_OPTIONS('-', 0, 0, 0, 0),
        { NULL, 0, NULL, 0 }
    };

    flt_flag = UPDATE_NM | UPDATE_MD;
    is_bam_out = is_uncompressed = is_realn = max_nm = capQ = baq_flag = 0;
    strcpy(mode_w, "w");
    while ((c = getopt_long(argc, argv, "EqreuNhbSC:n:Ad", lopts, NULL)) >= 0) {
        switch (c) {
        case 'r':
            is_realn = 1;
            break;
        case 'e':
            flt_flag |= USE_EQUAL;
            break;
        case 'd':
            flt_flag |= DROP_TAG;
            break;
        case 'q':
            flt_flag |= BIN_QUAL;
            break;
        case 'h':
            flt_flag |= HASH_QNM;
            break;
        case 'N':
            flt_flag &= ~(UPDATE_MD|UPDATE_NM);
            break;
        case 'b':
            is_bam_out = 1;
            break;
        case 'u':
            is_uncompressed = is_bam_out = 1;
            break;
        case 'S':
            break;
        case 'n':
            max_nm = atoi(optarg);
            break;
        case 'C':
            capQ = atoi(optarg);
            break;
        case 'A':
            baq_flag |= 1;
            break;
        case 'E':
            baq_flag |= 2;
            break;
        default:
            if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
            fprintf(stderr, "[bam_fillmd] unrecognized option '-%c'\n\n", c);
        /* else fall-through */
        case '?':
            return calmd_usage();
        }
    }
    if (is_bam_out) strcat(mode_w, "b");
    else strcat(mode_w, "h");
    if (is_uncompressed) strcat(mode_w, "0");
    if (optind + (ga.reference == NULL) >= argc)
        return calmd_usage();
    fp = sam_open_format(argv[optind], "r", &ga.in);
    if (fp == NULL) {
        print_error_errno("calmd", "Failed to open input file '%s'", argv[optind]);
        return 1;
    }

    header = sam_hdr_read(fp);
    if (header == NULL || header->n_targets == 0) {
        fprintf(stderr, "[bam_fillmd] input SAM does not have header. Abort!\n");
        goto fail;
    }

    fpout = sam_open_format("-", mode_w, &ga.out);
    if (fpout == NULL) {
        print_error_errno("calmd", "Failed to open output");
        goto fail;
    }
    if (sam_hdr_write(fpout, header) < 0) {
        print_error_errno("calmd", "Failed to write sam header");
        goto fail;
    }

    ref_file = argc > optind + 1 ? argv[optind+1] : ga.reference;
    fai = fai_load(ref_file);

    if (!fai) {
        print_error_errno("calmd", "Failed to open reference file '%s'", ref_file);
        goto fail;
    }

    b = bam_init1();
    if (!b) {
        fprintf(stderr, "[bam_fillmd] Failed to allocate bam struct\n");
        goto fail;
    }
    while ((ret = sam_read1(fp, header, b)) >= 0) {
        if (b->core.tid >= 0) {
            if (tid != b->core.tid) {
                free(ref);
                ref = fai_fetch(fai, header->target_name[b->core.tid], &len);
                tid = b->core.tid;
                if (ref == 0) { // FIXME: Should this always be fatal?
                    fprintf(stderr, "[bam_fillmd] fail to find sequence '%s' in the reference.\n",
                            header->target_name[tid]);
                    if (is_realn || capQ > 10) goto fail; // Would otherwise crash
                }
            }
            if (is_realn) sam_prob_realn(b, ref, len, baq_flag);
            if (capQ > 10) {
                int q = sam_cap_mapq(b, ref, len, capQ);
                if (b->core.qual > q) b->core.qual = q;
            }
            if (ref) bam_fillmd1_core(b, ref, len, flt_flag, max_nm);
        }
        if (sam_write1(fpout, header, b) < 0) {
            print_error_errno("calmd", "failed to write to output file");
            goto fail;
        }
    }
    if (ret < -1) {
        fprintf(stderr, "[bam_fillmd] Error reading input.\n");
        goto fail;
    }
    bam_destroy1(b);
    bam_hdr_destroy(header);

    free(ref);
    fai_destroy(fai);
    sam_close(fp);
    if (sam_close(fpout) < 0) {
        fprintf(stderr, "[bam_fillmd] error when closing output file\n");
        return 1;
    }
    return 0;

fail:
    free(ref);
    if (b) bam_destroy1(b);
    if (header) bam_hdr_destroy(header);
    if (fai) fai_destroy(fai);
    if (fp) sam_close(fp);
    if (fpout) sam_close(fpout);
    return 1;
}
Example #8
0
static bool init(const parsed_opts_t* opts, state_t** state_out) {
    state_t* retval = (state_t*) calloc(1, sizeof(state_t));
    if (retval == NULL) {
        fprintf(stderr, "[init] Out of memory allocating state struct.\n");
        return false;
    }
    *state_out = retval;

    // Open files
    retval->input_file = sam_open_format(opts->input_name, "r", &opts->ga.in);
    if (retval->input_file == NULL) {
        fprintf(stderr, "[init] Could not open input file: %s\n", opts->input_name);
        return false;
    }
    retval->input_header = sam_hdr_read(retval->input_file);

    retval->output_header = bam_hdr_dup(retval->input_header);
    retval->output_file = sam_open_format(opts->output_name == NULL?"-":opts->output_name, "w", &opts->ga.out);

    if (retval->output_file == NULL) {
        print_error_errno("addreplacerg", "Could not open output file: %s\n", opts->output_name);
        return false;
    }

    if (opts->rg_line) {
        // Append new RG line to header.
        // Check does not already exist
        if ( confirm_rg(retval->output_header, opts->rg_id) ) {
            fprintf(stderr, "[init] ID of new RG line specified conflicts with that of an existing header RG line. Overwrite not yet implemented.\n");
            return false;
        }
        retval->rg_id = strdup(opts->rg_id);
        size_t new_len = strlen( retval->output_header->text ) + strlen( opts->rg_line ) + 2;
        char* new_header = malloc(new_len);
        if (!new_header) {
            fprintf(stderr, "[init] Out of memory whilst writing new header.\n");
            return false;
        }
        sprintf(new_header,"%s%s\n", retval->output_header->text, opts->rg_line);
        free(retval->output_header->text);
        retval->output_header->text = new_header;
        retval->output_header->l_text = (int)new_len - 1;
    } else {
        if (opts->rg_id) {
            // Confirm what has been supplied exists
            if ( !confirm_rg(retval->output_header, opts->rg_id) ) {
                fprintf(stderr, "RG ID supplied does not exist in header. Supply full @RG line with -r instead?\n");
                return false;
            }
            retval->rg_id = strdup(opts->rg_id);
        } else {
            if ((retval->rg_id = get_first_rgid(retval->output_header)) == NULL ) {
                fprintf(stderr, "No RG specified on command line or in existing header.\n");
                return false;
            }
        }
    }

    switch (opts->mode) {
        case overwrite_all:
            retval->mode_func = &overwrite_all_func;
            break;
        case orphan_only:
            retval->mode_func = &orphan_only_func;
            break;
    }

    return true;
}
Example #9
0
int bam_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outbam)
{
    BGZF *fp, *in = NULL;
    uint8_t *buf = NULL;
    uint8_t ebuf[BGZF_EMPTY_BLOCK_SIZE];
    const int es=BGZF_EMPTY_BLOCK_SIZE;
    int i;

    fp = strcmp(outbam, "-")? bgzf_open(outbam, "w") : bgzf_fdopen(fileno(pysam_stdout), "w");
    if (fp == 0) {
        print_error_errno("cat", "fail to open output file '%s'", outbam);
        return -1;
    }
    if (h) {
        if (bam_hdr_write(fp, h) < 0) {
            print_error_errno("cat", "Couldn't write header");
            goto fail;
        }
    }

    buf = (uint8_t*) malloc(BUF_SIZE);
    if (!buf) {
        fprintf(pysam_stderr, "[%s] Couldn't allocate buffer\n", __func__);
        goto fail;
    }
    for(i = 0; i < nfn; ++i){
        bam_hdr_t *old;
        int len,j;

        in = strcmp(fn[i], "-")? bgzf_open(fn[i], "r") : bgzf_fdopen(fileno(stdin), "r");
        if (in == 0) {
            print_error_errno("cat", "fail to open file '%s'", fn[i]);
            goto fail;
        }
        if (in->is_write) return -1;

        old = bam_hdr_read(in);
        if (old == NULL) {
            fprintf(pysam_stderr, "[%s] ERROR: couldn't read header for '%s'.\n",
                    __func__, fn[i]);
            goto fail;
        }
        if (h == 0 && i == 0) {
            if (bam_hdr_write(fp, old) < 0) {
                print_error_errno("cat", "Couldn't write header");
                goto fail;
            }
        }

        if (in->block_offset < in->block_length) {
            if (bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset) < 0) goto write_fail;
            if (bgzf_flush(fp) != 0) goto write_fail;
        }

        j=0;
        while ((len = bgzf_raw_read(in, buf, BUF_SIZE)) > 0) {
            if(len<es){
                int diff=es-len;
                if(j==0) {
                    fprintf(pysam_stderr, "[%s] ERROR: truncated file?: '%s'.\n", __func__, fn[i]);
                    goto fail;
                }
                if (bgzf_raw_write(fp, ebuf, len) < 0) goto write_fail;

                memcpy(ebuf,ebuf+len,diff);
                memcpy(ebuf+diff,buf,len);
            } else {
                if(j!=0) {
                    if (bgzf_raw_write(fp, ebuf, es) < 0) goto write_fail;
                }
                len-= es;
                memcpy(ebuf,buf+len,es);
                if (bgzf_raw_write(fp, buf, len) < 0) goto write_fail;
            }
            j=1;
        }

        /* check final gzip block */
        {
            const uint8_t gzip1=ebuf[0];
            const uint8_t gzip2=ebuf[1];
            const uint32_t isize=*((uint32_t*)(ebuf+es-4));
            if(((gzip1!=GZIPID1) || (gzip2!=GZIPID2)) || (isize!=0)) {
                fprintf(pysam_stderr, "[%s] WARNING: Unexpected block structure in file '%s'.", __func__, fn[i]);
                fprintf(pysam_stderr, " Possible output corruption.\n");
                if (bgzf_raw_write(fp, ebuf, es) < 0) goto write_fail;
            }
        }
        bam_hdr_destroy(old);
        bgzf_close(in);
        in = NULL;
    }
    free(buf);
    if (bgzf_close(fp) < 0) {
        fprintf(pysam_stderr, "[%s] Error on closing '%s'.\n", __func__, outbam);
        return -1;
    }
    return 0;

 write_fail:
    fprintf(pysam_stderr, "[%s] Error writing to '%s'.\n", __func__, outbam);
 fail:
    if (in) bgzf_close(in);
    if (fp) bgzf_close(fp);
    free(buf);
    return -1;
}
Example #10
0
int main_depth(int argc, char *argv[])
{
    int i, n, tid, reg_tid, beg, end, pos, *n_plp, baseQ = 0, mapQ = 0, min_len = 0;
    int all = 0, status = EXIT_SUCCESS, nfiles, max_depth = -1;
    const bam_pileup1_t **plp;
    char *reg = 0; // specified region
    void *bed = 0; // BED data structure
    char *file_list = NULL, **fn = NULL;
    bam_hdr_t *h = NULL; // BAM header of the 1st input
    aux_t **data;
    bam_mplp_t mplp;
    int last_pos = -1, last_tid = -1, ret;

    sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
    static const struct option lopts[] = {
        SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', 0),
        { NULL, 0, NULL, 0 }
    };

    // parse the command line
    while ((n = getopt_long(argc, argv, "r:b:q:Q:l:f:am:d:", lopts, NULL)) >= 0) {
        switch (n) {
        case 'l':
            min_len = atoi(optarg);
            break; // minimum query length
        case 'r':
            reg = strdup(optarg);
            break;   // parsing a region requires a BAM header
        case 'b':
            bed = bed_read(optarg); // BED or position list file can be parsed now
            if (!bed) {
                print_error_errno("depth", "Could not read file \"%s\"", optarg);
                return 1;
            }
            break;
        case 'q':
            baseQ = atoi(optarg);
            break;   // base quality threshold
        case 'Q':
            mapQ = atoi(optarg);
            break;    // mapping quality threshold
        case 'f':
            file_list = optarg;
            break;
        case 'a':
            all++;
            break;
        case 'd':
        case 'm':
            max_depth = atoi(optarg);
            break; // maximum coverage depth
        default:
            if (parse_sam_global_opt(n, optarg, lopts, &ga) == 0) break;
        /* else fall-through */
        case '?':
            return usage();
        }
    }
    if (optind == argc && !file_list)
        return usage();

    // initialize the auxiliary data structures
    if (file_list)
    {
        if ( read_file_list(file_list,&nfiles,&fn) ) return 1;
        n = nfiles;
        argv = fn;
        optind = 0;
    }
    else
        n = argc - optind; // the number of BAMs on the command line
    data = calloc(n, sizeof(aux_t*)); // data[i] for the i-th input
    reg_tid = 0;
    beg = 0;
    end = INT_MAX;  // set the default region
    for (i = 0; i < n; ++i) {
        int rf;
        data[i] = calloc(1, sizeof(aux_t));
        data[i]->fp = sam_open_format(argv[optind+i], "r", &ga.in); // open BAM
        if (data[i]->fp == NULL) {
            print_error_errno("depth", "Could not open \"%s\"", argv[optind+i]);
            status = EXIT_FAILURE;
            goto depth_end;
        }
        rf = SAM_FLAG | SAM_RNAME | SAM_POS | SAM_MAPQ | SAM_CIGAR | SAM_SEQ;
        if (baseQ) rf |= SAM_QUAL;
        if (hts_set_opt(data[i]->fp, CRAM_OPT_REQUIRED_FIELDS, rf)) {
            fprintf(stderr, "Failed to set CRAM_OPT_REQUIRED_FIELDS value\n");
            return 1;
        }
        if (hts_set_opt(data[i]->fp, CRAM_OPT_DECODE_MD, 0)) {
            fprintf(stderr, "Failed to set CRAM_OPT_DECODE_MD value\n");
            return 1;
        }
        data[i]->min_mapQ = mapQ;                    // set the mapQ filter
        data[i]->min_len  = min_len;                 // set the qlen filter
        data[i]->hdr = sam_hdr_read(data[i]->fp);    // read the BAM header
        if (data[i]->hdr == NULL) {
            fprintf(stderr, "Couldn't read header for \"%s\"\n",
                    argv[optind+i]);
            status = EXIT_FAILURE;
            goto depth_end;
        }
        if (reg) { // if a region is specified
            hts_idx_t *idx = sam_index_load(data[i]->fp, argv[optind+i]);  // load the index
            if (idx == NULL) {
                print_error("depth", "can't load index for \"%s\"", argv[optind+i]);
                status = EXIT_FAILURE;
                goto depth_end;
            }
            data[i]->iter = sam_itr_querys(idx, data[i]->hdr, reg); // set the iterator
            hts_idx_destroy(idx); // the index is not needed any more; free the memory
            if (data[i]->iter == NULL) {
                print_error("depth", "can't parse region \"%s\"", reg);
                status = EXIT_FAILURE;
                goto depth_end;
            }
        }
    }

    h = data[0]->hdr; // easy access to the header of the 1st BAM
    if (reg) {
        beg = data[0]->iter->beg; // and to the parsed region coordinates
        end = data[0]->iter->end;
        reg_tid = data[0]->iter->tid;
    }

    // the core multi-pileup loop
    mplp = bam_mplp_init(n, read_bam, (void**)data); // initialization
    if (0 < max_depth)
        bam_mplp_set_maxcnt(mplp,max_depth);  // set maximum coverage depth
    n_plp = calloc(n, sizeof(int)); // n_plp[i] is the number of covering reads from the i-th BAM
    plp = calloc(n, sizeof(bam_pileup1_t*)); // plp[i] points to the array of covering reads (internal in mplp)
    while ((ret=bam_mplp_auto(mplp, &tid, &pos, n_plp, plp)) > 0) { // come to the next covered position
        if (pos < beg || pos >= end) continue; // out of range; skip
        if (tid >= h->n_targets) continue;     // diff number of @SQ lines per file?
        if (all) {
            while (tid > last_tid) {
                if (last_tid >= 0 && !reg) {
                    // Deal with remainder or entirety of last tid.
                    while (++last_pos < h->target_len[last_tid]) {
                        // Horribly inefficient, but the bed API is an obfuscated black box.
                        if (bed && bed_overlap(bed, h->target_name[last_tid], last_pos, last_pos + 1) == 0)
                            continue;
                        fputs(h->target_name[last_tid], stdout);
                        printf("\t%d", last_pos+1);
                        for (i = 0; i < n; i++)
                            putchar('\t'), putchar('0');
                        putchar('\n');
                    }
                }
                last_tid++;
                last_pos = -1;
                if (all < 2)
                    break;
            }

            // Deal with missing portion of current tid
            while (++last_pos < pos) {
                if (last_pos < beg) continue; // out of range; skip
                if (bed && bed_overlap(bed, h->target_name[tid], last_pos, last_pos + 1) == 0)
                    continue;
                fputs(h->target_name[tid], stdout);
                printf("\t%d", last_pos+1);
                for (i = 0; i < n; i++)
                    putchar('\t'), putchar('0');
                putchar('\n');
            }

            last_tid = tid;
            last_pos = pos;
        }
        if (bed && bed_overlap(bed, h->target_name[tid], pos, pos + 1) == 0) continue;
        fputs(h->target_name[tid], stdout);
        printf("\t%d", pos+1); // a customized printf() would be faster
        for (i = 0; i < n; ++i) { // base level filters have to go here
            int j, m = 0;
            for (j = 0; j < n_plp[i]; ++j) {
                const bam_pileup1_t *p = plp[i] + j; // DON'T modfity plp[][] unless you really know
                if (p->is_del || p->is_refskip) ++m; // having dels or refskips at tid:pos
                else if (bam_get_qual(p->b)[p->qpos] < baseQ) ++m; // low base quality
            }
            printf("\t%d", n_plp[i] - m); // this the depth to output
        }
        putchar('\n');
    }
    if (ret < 0) status = EXIT_FAILURE;
    free(n_plp);
    free(plp);
    bam_mplp_destroy(mplp);

    if (all) {
        // Handle terminating region
        if (last_tid < 0 && reg && all > 1) {
            last_tid = reg_tid;
            last_pos = beg-1;
        }
        while (last_tid >= 0 && last_tid < h->n_targets) {
            while (++last_pos < h->target_len[last_tid]) {
                if (last_pos >= end) break;
                if (bed && bed_overlap(bed, h->target_name[last_tid], last_pos, last_pos + 1) == 0)
                    continue;
                fputs(h->target_name[last_tid], stdout);
                printf("\t%d", last_pos+1);
                for (i = 0; i < n; i++)
                    putchar('\t'), putchar('0');
                putchar('\n');
            }
            last_tid++;
            last_pos = -1;
            if (all < 2 || reg)
                break;
        }
    }

depth_end:
    for (i = 0; i < n && data[i]; ++i) {
        bam_hdr_destroy(data[i]->hdr);
        if (data[i]->fp) sam_close(data[i]->fp);
        hts_itr_destroy(data[i]->iter);
        free(data[i]);
    }
    free(data);
    free(reg);
    if (bed) bed_destroy(bed);
    if ( file_list )
    {
        for (i=0; i<n; i++) free(fn[i]);
        free(fn);
    }
    sam_global_args_free(&ga);
    return status;
}
Example #11
0
int main_samview(int argc, char *argv[])
{
 int index;
    for(index = 0; index < argc; index++) {
        printf("The %d is %s\n",index,argv[index]);
    }
    getchar();return 0;
    int c, is_header = 0, is_header_only = 0, ret = 0, compress_level = -1, is_count = 0;
    int is_long_help = 0, n_threads = 0;
    int64_t count = 0;
    samFile *in = 0, *out = 0, *un_out=0;
    bam_hdr_t *header = NULL;
    char out_mode[5], out_un_mode[5], *out_format = "";
    char *fn_in = 0, *fn_out = 0, *fn_list = 0, *q, *fn_un_out = 0;
    sam_global_args ga = SAM_GLOBAL_ARGS_INIT;

    samview_settings_t settings = {
        .rghash = NULL,
        .min_mapQ = 0,
        .flag_on = 0,
        .flag_off = 0,
        .min_qlen = 0,
        .remove_B = 0,
        .subsam_seed = 0,
        .subsam_frac = -1.,
        .library = NULL,
        .bed = NULL,
    };

    static const struct option lopts[] = {
        SAM_OPT_GLOBAL_OPTIONS('-', 0, 'O', 0, 'T'),
        { "threads", required_argument, NULL, '@' },
        { NULL, 0, NULL, 0 }
    };

    /* parse command-line options */
    strcpy(out_mode, "w");
    strcpy(out_un_mode, "w");
    while ((c = getopt_long(argc, argv,
                            "SbBcCt:h1Ho:O:q:f:F:ul:r:?T:R:L:s:@:m:x:U:",
                            lopts, NULL)) >= 0) {
        switch (c) {
        case 's':
            if ((settings.subsam_seed = strtol(optarg, &q, 10)) != 0) {
                srand(settings.subsam_seed);
                settings.subsam_seed = rand();
            }
            settings.subsam_frac = strtod(q, &q);
            break;
        case 'm': settings.min_qlen = atoi(optarg); break;
        case 'c': is_count = 1; break;
        case 'S': break;
        case 'b': out_format = "b"; break;
        case 'C': out_format = "c"; break;
        case 't': fn_list = strdup(optarg); break;
        case 'h': is_header = 1; break;
        case 'H': is_header_only = 1; break;
        case 'o': fn_out = strdup(optarg); break;
        case 'U': fn_un_out = strdup(optarg); break;
        case 'f': settings.flag_on |= strtol(optarg, 0, 0); break;
        case 'F': settings.flag_off |= strtol(optarg, 0, 0); break;
        case 'q': settings.min_mapQ = atoi(optarg); break;
        case 'u': compress_level = 0; break;
        case '1': compress_level = 1; break;
        case 'l': settings.library = strdup(optarg); break;
        case 'L':
            if ((settings.bed = bed_read(optarg)) == NULL) {
                print_error_errno("view", "Could not read file \"%s\"", optarg);
                ret = 1;
                goto view_end;
            }
            break;
        case 'r':
            if (add_read_group_single("view", &settings, optarg) != 0) {
                ret = 1;
                goto view_end;
            }
            break;
        case 'R':
            if (add_read_groups_file("view", &settings, optarg) != 0) {
                ret = 1;
                goto view_end;
            }
            break;
                /* REMOVED as htslib doesn't support this
        //case 'x': out_format = "x"; break;
        //case 'X': out_format = "X"; break;
                 */
        case '?': is_long_help = 1; break;
        case 'B': settings.remove_B = 1; break;
        case '@': n_threads = strtol(optarg, 0, 0); break;
        case 'x':
            {
                if (strlen(optarg) != 2) {
                    fprintf(stderr, "main_samview: Error parsing -x auxiliary tags should be exactly two characters long.\n");
                    return usage(stderr, EXIT_FAILURE, is_long_help);
                }
                settings.remove_aux = (char**)realloc(settings.remove_aux, sizeof(char*) * (++settings.remove_aux_len));
                settings.remove_aux[settings.remove_aux_len-1] = optarg;
            }
            break;

        default:
            if (parse_sam_global_opt(c, optarg, lopts, &ga) != 0)
                return usage(stderr, EXIT_FAILURE, is_long_help);
            break;
        }
    }
    if (compress_level >= 0 && !*out_format) out_format = "b";
    if (is_header_only) is_header = 1;
    // File format auto-detection first
    if (fn_out)    sam_open_mode(out_mode+1,    fn_out,    NULL);
    if (fn_un_out) sam_open_mode(out_un_mode+1, fn_un_out, NULL);
    // Overridden by manual -b, -C
    if (*out_format)
        out_mode[1] = out_un_mode[1] = *out_format;
    out_mode[2] = out_un_mode[2] = '\0';
    // out_(un_)mode now 1 or 2 bytes long, followed by nul.
    if (compress_level >= 0) {
        char tmp[2];
        tmp[0] = compress_level + '0'; tmp[1] = '\0';
        strcat(out_mode, tmp);
        strcat(out_un_mode, tmp);
    }
    if (argc == optind && isatty(STDIN_FILENO)) return usage(stdout, EXIT_SUCCESS, is_long_help); // potential memory leak...

    fn_in = (optind < argc)? argv[optind] : "-";
    // generate the fn_list if necessary
    if (fn_list == 0 && ga.reference) fn_list = samfaipath(ga.reference);
    // open file handlers
    if ((in = sam_open_format(fn_in, "r", &ga.in)) == 0) {
        print_error_errno("view", "failed to open \"%s\" for reading", fn_in);
        ret = 1;
        goto view_end;
    }

    if (fn_list) {
        if (hts_set_fai_filename(in, fn_list) != 0) {
            fprintf(stderr, "[main_samview] failed to use reference \"%s\".\n", fn_list);
            ret = 1;
            goto view_end;
        }
    }
    if ((header = sam_hdr_read(in)) == 0) {
        fprintf(stderr, "[main_samview] fail to read the header from \"%s\".\n", fn_in);
        ret = 1;
        goto view_end;
    }
    if (settings.rghash) { // FIXME: I do not know what "bam_header_t::n_text" is for...
        char *tmp;
        int l;
        tmp = drop_rg(header->text, settings.rghash, &l);
        free(header->text);
        header->text = tmp;
        header->l_text = l;
    }
    if (!is_count) {
        if ((out = sam_open_format(fn_out? fn_out : "-", out_mode, &ga.out)) == 0) {
            print_error_errno("view", "failed to open \"%s\" for writing", fn_out? fn_out : "standard output");
            ret = 1;
            goto view_end;
        }
        if (fn_list) {
            if (hts_set_fai_filename(out, fn_list) != 0) {
                fprintf(stderr, "[main_samview] failed to use reference \"%s\".\n", fn_list);
                ret = 1;
                goto view_end;
            }
        }
        if (*out_format || is_header ||
            out_mode[1] == 'b' || out_mode[1] == 'c' ||
            (ga.out.format != sam && ga.out.format != unknown_format))  {
            if (sam_hdr_write(out, header) != 0) {
                fprintf(stderr, "[main_samview] failed to write the SAM header\n");
                ret = 1;
                goto view_end;
            }
        }
        if (fn_un_out) {
            if ((un_out = sam_open_format(fn_un_out, out_un_mode, &ga.out)) == 0) {
                print_error_errno("view", "failed to open \"%s\" for writing", fn_un_out);
                ret = 1;
                goto view_end;
            }
            if (fn_list) {
                if (hts_set_fai_filename(un_out, fn_list) != 0) {
                    fprintf(stderr, "[main_samview] failed to use reference \"%s\".\n", fn_list);
                    ret = 1;
                    goto view_end;
                }
            }
            if (*out_format || is_header ||
                out_un_mode[1] == 'b' || out_un_mode[1] == 'c' ||
                (ga.out.format != sam && ga.out.format != unknown_format))  {
                if (sam_hdr_write(un_out, header) != 0) {
                    fprintf(stderr, "[main_samview] failed to write the SAM header\n");
                    ret = 1;
                    goto view_end;
                }
            }
        }
    }

    if (n_threads > 1) { if (out) hts_set_threads(out, n_threads); }
    if (is_header_only) goto view_end; // no need to print alignments

    if (optind + 1 >= argc) { // convert/print the entire file
        bam1_t *b = bam_init1();
        int r;
        while ((r = sam_read1(in, header, b)) >= 0) { // read one alignment from `in'
            if (!process_aln(header, b, &settings)) {
                if (!is_count) { if (check_sam_write1(out, header, b, fn_out, &ret) < 0) break; }
                count++;
            } else {
                if (un_out) { if (check_sam_write1(un_out, header, b, fn_un_out, &ret) < 0) break; }
            }
        }
        if (r < -1) {
            fprintf(stderr, "[main_samview] truncated file.\n");
            ret = 1;
        }
        bam_destroy1(b);
    } else { // retrieve alignments in specified regions
        int i;
        bam1_t *b;
        hts_idx_t *idx = sam_index_load(in, fn_in); // load index
        if (idx == 0) { // index is unavailable
            fprintf(stderr, "[main_samview] random alignment retrieval only works for indexed BAM or CRAM files.\n");
            ret = 1;
            goto view_end;
        }
        b = bam_init1();
        for (i = optind + 1; i < argc; ++i) {
            int result;
            hts_itr_t *iter = sam_itr_querys(idx, header, argv[i]); // parse a region in the format like `chr2:100-200'
            if (iter == NULL) { // region invalid or reference name not found
                int beg, end;
                if (hts_parse_reg(argv[i], &beg, &end))
                    fprintf(stderr, "[main_samview] region \"%s\" specifies an unknown reference name. Continue anyway.\n", argv[i]);
                else
                    fprintf(stderr, "[main_samview] region \"%s\" could not be parsed. Continue anyway.\n", argv[i]);
                continue;
            }
            // fetch alignments
            while ((result = sam_itr_next(in, iter, b)) >= 0) {
                if (!process_aln(header, b, &settings)) {
                    if (!is_count) { if (check_sam_write1(out, header, b, fn_out, &ret) < 0) break; }
                    count++;
                } else {
                    if (un_out) { if (check_sam_write1(un_out, header, b, fn_un_out, &ret) < 0) break; }
                }
            }
            hts_itr_destroy(iter);
            if (result < -1) {
                fprintf(stderr, "[main_samview] retrieval of region \"%s\" failed due to truncated file or corrupt BAM index file\n", argv[i]);
                ret = 1;
                break;
            }
        }
        bam_destroy1(b);
        hts_idx_destroy(idx); // destroy the BAM index
    }

view_end:
    if (is_count && ret == 0)
        printf("%" PRId64 "\n", count);

    // close files, free and return
    if (in) check_sam_close("view", in, fn_in, "standard input", &ret);
    if (out) check_sam_close("view", out, fn_out, "standard output", &ret);
    if (un_out) check_sam_close("view", un_out, fn_un_out, "file", &ret);

    free(fn_list); free(fn_out); free(settings.library);  free(fn_un_out);
    sam_global_args_free(&ga);
    if ( header ) bam_hdr_destroy(header);
    if (settings.bed) bed_destroy(settings.bed);
    if (settings.rghash) {
        khint_t k;
        for (k = 0; k < kh_end(settings.rghash); ++k)
            if (kh_exist(settings.rghash, k)) free((char*)kh_key(settings.rghash, k));
        kh_destroy(rg, settings.rghash);
    }
    if (settings.remove_aux_len) {
        free(settings.remove_aux);
    }
    return ret;
}

static int usage(FILE *fp, int exit_status, int is_long_help)
{
    fprintf(fp,
"\n"
"Usage: samtools view [options] <in.bam>|<in.sam>|<in.cram> [region ...]\n"
"\n"
"Options:\n"
// output options
"  -b       output BAM\n"
"  -C       output CRAM (requires -T)\n"
"  -1       use fast BAM compression (implies -b)\n"
"  -u       uncompressed BAM output (implies -b)\n"
"  -h       include header in SAM output\n"
"  -H       print SAM header only (no alignments)\n"
"  -c       print only the count of matching records\n"
"  -o FILE  output file name [stdout]\n"
"  -U FILE  output reads not selected by filters to FILE [null]\n"
// extra input
"  -t FILE  FILE listing reference names and lengths (see long help) [null]\n"
// read filters
"  -L FILE  only include reads overlapping this BED FILE [null]\n"
"  -r STR   only include reads in read group STR [null]\n"
"  -R FILE  only include reads with read group listed in FILE [null]\n"
"  -q INT   only include reads with mapping quality >= INT [0]\n"
"  -l STR   only include reads in library STR [null]\n"
"  -m INT   only include reads with number of CIGAR operations consuming\n"
"           query sequence >= INT [0]\n"
"  -f INT   only include reads with all bits set in INT set in FLAG [0]\n"
"  -F INT   only include reads with none of the bits set in INT set in FLAG [0]\n"
// read processing
"  -x STR   read tag to strip (repeatable) [null]\n"
"  -B       collapse the backward CIGAR operation\n"
"  -s FLOAT integer part sets seed of random number generator [0];\n"
"           rest sets fraction of templates to subsample [no subsampling]\n"
// general options
"  -@, --threads INT\n"
"           number of BAM/CRAM compression threads [0]\n"
"  -?       print long help, including note about region specification\n"
"  -S       ignored (input format is auto-detected)\n");

    sam_global_opt_help(fp, "-.O.T");
    fprintf(fp, "\n");

    if (is_long_help)
        fprintf(fp,
"Notes:\n"
"\n"
"1. This command now auto-detects the input format (BAM/CRAM/SAM).\n"
"   Further control over the CRAM format can be specified by using the\n"
"   --output-fmt-option, e.g. to specify the number of sequences per slice\n"
"   and to use avoid reference based compression:\n"
"\n"
"\tsamtools view -C --output-fmt-option seqs_per_slice=5000 \\\n"
"\t   --output-fmt-option no_ref -o out.cram in.bam\n"
"\n"
"   Options can also be specified as a comma separated list within the\n"
"   --output-fmt value too.  For example this is equivalent to the above\n"
"\n"
"\tsamtools view --output-fmt cram,seqs_per_slice=5000,no_ref \\\n"
"\t   -o out.cram in.bam\n"
"\n"
"2. The file supplied with `-t' is SPACE/TAB delimited with the first\n"
"   two fields of each line consisting of the reference name and the\n"
"   corresponding sequence length. The `.fai' file generated by \n"
"   `samtools faidx' is suitable for use as this file. This may be an\n"
"   empty file if reads are unaligned.\n"
"\n"
"3. SAM->BAM conversion:  samtools view -bT ref.fa in.sam.gz\n"
"\n"
"4. BAM->SAM conversion:  samtools view -h in.bam\n"
"\n"
"5. A region should be presented in one of the following formats:\n"
"   `chr1', `chr2:1,000' and `chr3:1000-2,000'. When a region is\n"
"   specified, the input alignment file must be a sorted and indexed\n"
"   alignment (BAM/CRAM) file.\n"
"\n"
"6. Option `-u' is preferred over `-b' when the output is piped to\n"
"   another samtools command.\n"
"\n");

    return exit_status;
}
Example #12
0
static bool init_state(const bam2fq_opts_t* opts, bam2fq_state_t** state_out)
{
    bam2fq_state_t* state = calloc(1, sizeof(bam2fq_state_t));
    state->flag_on = opts->flag_on;
    state->flag_off = opts->flag_off;
    state->has12 = opts->has12;
    state->use_oq = opts->use_oq;
    state->copy_tags = opts->copy_tags;
    state->filetype = opts->filetype;
    state->def_qual = opts->def_qual;

    state->fp = sam_open(opts->fn_input, "r");
    if (state->fp == NULL) {
        print_error_errno("bam2fq","Cannot read file \"%s\"", opts->fn_input);
        free(state);
        return false;
    }
    uint32_t rf = SAM_QNAME | SAM_FLAG | SAM_SEQ | SAM_QUAL;
    if (opts->use_oq) rf |= SAM_AUX;
    if (hts_set_opt(state->fp, CRAM_OPT_REQUIRED_FIELDS, rf)) {
        fprintf(stderr, "Failed to set CRAM_OPT_REQUIRED_FIELDS value\n");
        free(state);
        return false;
    }
    if (hts_set_opt(state->fp, CRAM_OPT_DECODE_MD, 0)) {
        fprintf(stderr, "Failed to set CRAM_OPT_DECODE_MD value\n");
        free(state);
        return false;
    }
    if (opts->fnse) {
        state->fpse = fopen(opts->fnse,"w");
        if (state->fpse == NULL) {
            print_error_errno("bam2fq", "Cannot write to singleton file \"%s\"", opts->fnse);
            free(state);
            return false;
        }
    }

    int i;
    for (i = 0; i < 3; ++i) {
        if (opts->fnr[i]) {
            state->fpr[i] = fopen(opts->fnr[i], "w");
            if (state->fpr[i] == NULL) {
                print_error_errno("bam2fq", "Cannot write to r%d file \"%s\"", i, opts->fnr[i]);
                free(state);
                return false;
            }
        } else {
            state->fpr[i] = stdout;
        }
    }

    state->h = sam_hdr_read(state->fp);
    if (state->h == NULL) {
        fprintf(stderr, "Failed to read header for \"%s\"\n", opts->fn_input);
        free(state);
        return false;
    }

    *state_out = state;
    return true;
}
Example #13
0
// currently, this function ONLY works if each read has one hit
static int bam_mating_core(samFile *in, samFile *out, int remove_reads, int proper_pair_check, int add_ct, int do_mate_scoring)
{
    bam_hdr_t *header;
    bam1_t *b[2] = { NULL, NULL };
    int curr, has_prev, pre_end = 0, cur_end = 0, result;
    kstring_t str;

    str.l = str.m = 0; str.s = 0;
    header = sam_hdr_read(in);
    if (header == NULL) {
        fprintf(stderr, "[bam_mating_core] ERROR: Couldn't read header\n");
        return 1;
    }
    // Accept unknown, unsorted, or queryname sort order, but error on coordinate sorted.
    if ((header->l_text > 3) && (strncmp(header->text, "@HD", 3) == 0)) {
        char *p, *q;
        p = strstr(header->text, "\tSO:coordinate");
        q = strchr(header->text, '\n');
        // Looking for SO:coordinate within the @HD line only
        // (e.g. must ignore in a @CO comment line later in header)
        if ((p != 0) && (p < q)) {
            fprintf(stderr, "[bam_mating_core] ERROR: Coordinate sorted, require grouped/sorted by queryname.\n");
            goto fail;
        }
    }
    if (sam_hdr_write(out, header) < 0) goto write_fail;

    b[0] = bam_init1();
    b[1] = bam_init1();
    curr = 0; has_prev = 0;
    while ((result = sam_read1(in, header, b[curr])) >= 0) {
        bam1_t *cur = b[curr], *pre = b[1-curr];
        if (cur->core.flag & BAM_FSECONDARY)
        {
            if ( !remove_reads ) {
                if (sam_write1(out, header, cur) < 0) goto write_fail;
            }
            continue; // skip secondary alignments
        }
        if (cur->core.flag & BAM_FSUPPLEMENTARY)
        {
            if (sam_write1(out, header, cur) < 0) goto write_fail;
            continue; // pass supplementary alignments through unchanged (TODO:make them match read they came from)
        }
        if (cur->core.tid < 0 || cur->core.pos < 0) // If unmapped set the flag
        {
            cur->core.flag |= BAM_FUNMAP;
        }
        if ((cur->core.flag&BAM_FUNMAP) == 0) // If mapped calculate end
        {
            cur_end = bam_endpos(cur);

            // Check cur_end isn't past the end of the contig we're on, if it is set the UNMAP'd flag
            if (cur_end > (int)header->target_len[cur->core.tid]) cur->core.flag |= BAM_FUNMAP;
        }
        if (has_prev) { // do we have a pair of reads to examine?
            if (strcmp(bam_get_qname(cur), bam_get_qname(pre)) == 0) { // identical pair name
                pre->core.flag |= BAM_FPAIRED;
                cur->core.flag |= BAM_FPAIRED;
                if (sync_mate(pre, cur)) goto fail;

                if (pre->core.tid == cur->core.tid && !(cur->core.flag&(BAM_FUNMAP|BAM_FMUNMAP))
                    && !(pre->core.flag&(BAM_FUNMAP|BAM_FMUNMAP))) // if safe set TLEN/ISIZE
                {
                    uint32_t cur5, pre5;
                    cur5 = (cur->core.flag&BAM_FREVERSE)? cur_end : cur->core.pos;
                    pre5 = (pre->core.flag&BAM_FREVERSE)? pre_end : pre->core.pos;
                    cur->core.isize = pre5 - cur5; pre->core.isize = cur5 - pre5;
                } else cur->core.isize = pre->core.isize = 0;
                if (add_ct) bam_template_cigar(pre, cur, &str);
                // TODO: Add code to properly check if read is in a proper pair based on ISIZE distribution
                if (proper_pair_check && !plausibly_properly_paired(pre,cur)) {
                    pre->core.flag &= ~BAM_FPROPER_PAIR;
                    cur->core.flag &= ~BAM_FPROPER_PAIR;
                }

                if (do_mate_scoring) {
                    if ((add_mate_score(pre, cur) == -1) || (add_mate_score(cur, pre) == -1)) {
                        fprintf(stderr, "[bam_mating_core] ERROR: unable to add mate score.\n");
                        goto fail;
                    }
                }

                // Write out result
                if ( !remove_reads ) {
                    if (sam_write1(out, header, pre) < 0) goto write_fail;
                    if (sam_write1(out, header, cur) < 0) goto write_fail;
                } else {
                    // If we have to remove reads make sure we do it in a way that doesn't create orphans with bad flags
                    if(pre->core.flag&BAM_FUNMAP) cur->core.flag &= ~(BAM_FPAIRED|BAM_FMREVERSE|BAM_FPROPER_PAIR);
                    if(cur->core.flag&BAM_FUNMAP) pre->core.flag &= ~(BAM_FPAIRED|BAM_FMREVERSE|BAM_FPROPER_PAIR);
                    if(!(pre->core.flag&BAM_FUNMAP)) {
                        if (sam_write1(out, header, pre) < 0) goto write_fail;
                    }
                    if(!(cur->core.flag&BAM_FUNMAP)) {
                        if (sam_write1(out, header, cur) < 0) goto write_fail;
                    }
                }
                has_prev = 0;
            } else { // unpaired?  clear bad info and write it out
                if (pre->core.tid < 0 || pre->core.pos < 0 || pre->core.flag&BAM_FUNMAP) { // If unmapped
                    pre->core.flag |= BAM_FUNMAP;
                    pre->core.tid = -1;
                    pre->core.pos = -1;
                }
                pre->core.mtid = -1; pre->core.mpos = -1; pre->core.isize = 0;
                pre->core.flag &= ~(BAM_FPAIRED|BAM_FMREVERSE|BAM_FPROPER_PAIR);
                if ( !remove_reads || !(pre->core.flag&BAM_FUNMAP) ) {
                    if (sam_write1(out, header, pre) < 0) goto write_fail;
                }
            }
        } else has_prev = 1;
        curr = 1 - curr;
        pre_end = cur_end;
    }
    if (result < -1) goto fail;
    if (has_prev && !remove_reads) { // If we still have a BAM in the buffer it must be unpaired
        bam1_t *pre = b[1-curr];
        if (pre->core.tid < 0 || pre->core.pos < 0 || pre->core.flag&BAM_FUNMAP) { // If unmapped
            pre->core.flag |= BAM_FUNMAP;
            pre->core.tid = -1;
            pre->core.pos = -1;
        }
        pre->core.mtid = -1; pre->core.mpos = -1; pre->core.isize = 0;
        pre->core.flag &= ~(BAM_FPAIRED|BAM_FMREVERSE|BAM_FPROPER_PAIR);

        if (sam_write1(out, header, pre) < 0) goto write_fail;
    }
    bam_hdr_destroy(header);
    bam_destroy1(b[0]);
    bam_destroy1(b[1]);
    free(str.s);
    return 0;

 write_fail:
    print_error_errno("fixmate", "Couldn't write to output file");
 fail:
    bam_hdr_destroy(header);
    bam_destroy1(b[0]);
    bam_destroy1(b[1]);
    return 1;
}
Example #14
0
int main_cat(int argc, char *argv[])
{
    bam_hdr_t *h = 0;
    char *outfn = 0;
    int c, ret = 0;
    samFile *in;

    while ((c = getopt(argc, argv, "h:o:")) >= 0) {
        switch (c) {
            case 'h': {
                samFile *fph = sam_open(optarg, "r");
                if (fph == 0) {
                    fprintf(pysam_stderr, "[%s] ERROR: fail to read the header from '%s'.\n", __func__, argv[1]);
                    return 1;
                }
                h = sam_hdr_read(fph);
                if (h == NULL) {
                    fprintf(pysam_stderr,
                            "[%s] ERROR: failed to read the header for '%s'.\n",
                            __func__, argv[1]);
                    return 1;
                }
                sam_close(fph);
                break;
            }
            case 'o': outfn = strdup(optarg); break;
        }
    }
    if (argc - optind < 1) {
        fprintf(pysam_stderr, "Usage: samtools cat [-h header.sam] [-o out.bam] <in1.bam> [...]\n");
        return 1;
    }

    in = sam_open(argv[optind], "r");
    if (!in) {
        print_error_errno("cat", "failed to open file '%s'", argv[optind]);
        return 1;
    }

    switch (hts_get_format(in)->format) {
    case bam:
        sam_close(in);
        if (bam_cat(argc - optind, argv + optind, h, outfn? outfn : "-") < 0)
            ret = 1;
        break;

    case cram:
        sam_close(in);
        if (cram_cat(argc - optind, argv + optind, h, outfn? outfn : "-") < 0)
            ret = 1;
        break;

    default:
        sam_close(in);
        fprintf(pysam_stderr, "[%s] ERROR: input is not BAM or CRAM\n", __func__);
        return 1;
    }
    free(outfn);

    if (h)
        bam_hdr_destroy(h);

    return ret;
}
Example #15
0
/*
 * CRAM files don't store the RG:Z:ID per read in the aux field.
 * Instead they have a numerical data series (RG) to point each read
 * back to the Nth @RG line in the file.  This means that we may need
 * to edit the RG data series (if the files were produced from
 * "samtools split" for example).
 *
 * The encoding method is stored in the compression header. Typical
 * examples:
 *
 * RG => EXTERNAL {18}           # Block content-id 18 holds RG values
 *                               # as a series of ITF8 encoded values
 *
 * RG => HUFFMAN {1, 255, 255, 255, 255, 255, 1, 0}
 *                               # One RG value #-1.  (No RG)
 *
 * RG => HUFFMAN {1, 0, 1, 0}    # One RG value #0 (always first RG)
 *
 * RG => HUFFMAN {2, 0, 1, 2, 1, 1}
 *                               # Two RG values, #0 and #1, written
 *                               # to the CORE block and possibly
 *                               # mixed with other data series.
 *
 * A single value can (but may not be) implemented as a zero bit
 * huffman code.  In this situation we can change the meta-data in the
 * compression header to renumber an RG value..
 */
int cram_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outcram)
{
    samFile *out;
    cram_fd *out_c;
    int i, vers_maj, vers_min;
    khash_s2i *rg2id = NULL;
    bam_hdr_t *new_h = NULL;

    /* Check consistent versioning and compatible headers */
    if (!(new_h = cram_cat_check_hdr(nfn, fn, h, &rg2id, &vers_maj, &vers_min)))
        return -1;

    /* Open the file with cram_vers */
    char vers[100];
    sprintf(vers, "%d.%d", vers_maj, vers_min);
    out = sam_open(outcram, "wc");
    if (out == 0) {
        print_error_errno("cat", "fail to open output file '%s'", outcram);
        return -1;
    }
    out_c = out->fp.cram;
    cram_set_option(out_c, CRAM_OPT_VERSION, vers);
    //fprintf(pysam_stderr, "Creating cram vers %s\n", vers);

    cram_fd_set_header(out_c, sam_hdr_parse_(new_h->text,  new_h->l_text)); // needed?
    if (sam_hdr_write(out, new_h) < 0) {
        print_error_errno("cat", "Couldn't write header");
        return -1;
    }

    for (i = 0; i < nfn; ++i) {
        samFile *in;
        cram_fd *in_c;
        cram_container *c;
        bam_hdr_t *old;
        int new_rg = -1;

        in = sam_open(fn[i], "rc");
        if (in == 0) {
            print_error_errno("cat", "fail to open file '%s'", fn[i]);
            return -1;
        }
        in_c = in->fp.cram;

        old = sam_hdr_read(in);
        khash_s2i *rg2id_in = hash_rg(old);

        // Compute RG mapping if suitable for changing.
        if (rg2id_in->n_id == 1) {
            int _;
            new_rg = hash_s2i_inc(rg2id, rg2id_in->id[0], NULL, &_);
        } else {
            new_rg = 0;
        }

        hash_s2i_free(rg2id_in);


        // Copy contains and blocks within them
        while ((c = cram_read_container(in_c))) {
            cram_block *blk;

           if (cram_container_is_empty(in_c)) {
                if (cram_write_container(out_c, c) != 0)
                    return -1;

                // Container compression header
                if (!(blk = cram_read_block(in_c)))
                    return -1;
                if (cram_write_block(out_c, blk) != 0) {
                    cram_free_block(blk);
                    return -1;
                }
                cram_free_block(blk);
                cram_free_container(c);

                continue;
            }

            // If we have just one RG key and new_rg != 0 then
            // we need to edit the compression header. IF WE CAN.
            if (new_rg) {
                int zero = 0;
                //fprintf(pysam_stderr, "Transcode RG %d to %d\n", 0, new_rg);
                cram_transcode_rg(in_c, out_c, c, 1, &zero, &new_rg);
            } else {
                int32_t num_slices;

                // Not switching rg so do the usual read/write loop
                if (cram_write_container(out_c, c) != 0)
                    return -1;

                // Container compression header
                if (!(blk = cram_read_block(in_c)))
                    return -1;
                if (cram_write_block(out_c, blk) != 0) {
                    cram_free_block(blk);
                    return -1;
                }
                cram_free_block(blk);


                // Container num_blocks can be invalid, due to a bug.
                // Instead we iterate in slice context instead.
                (void)cram_container_get_landmarks(c, &num_slices);
                cram_copy_slice(in_c, out_c, num_slices);
            }

            cram_free_container(c);
        }

        bam_hdr_destroy(old);
        sam_close(in);
    }
    sam_close(out);

    hash_s2i_free(rg2id);
    bam_hdr_destroy(new_h);

    return 0;
}
Example #16
0
int bam_mpileup(int argc, char *argv[])
{
    int c;
    const char *file_list = NULL;
    char **fn = NULL;
    int nfiles = 0, use_orphan = 0;
    mplp_conf_t mplp;
    memset(&mplp, 0, sizeof(mplp_conf_t));
    mplp.min_baseQ = 13;
    mplp.capQ_thres = 0;
    mplp.max_depth = 250; mplp.max_indel_depth = 250;
    mplp.openQ = 40; mplp.extQ = 20; mplp.tandemQ = 100;
    mplp.min_frac = 0.002; mplp.min_support = 1;
    mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN | MPLP_SMART_OVERLAPS;
    mplp.argc = argc; mplp.argv = argv;
    mplp.rflag_filter = BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP;
    mplp.output_fname = NULL;
    static const struct option lopts[] =
    {
        {"rf", required_argument, NULL, 1},   // require flag
        {"ff", required_argument, NULL, 2},   // filter flag
        {"incl-flags", required_argument, NULL, 1},
        {"excl-flags", required_argument, NULL, 2},
        {"output", required_argument, NULL, 3},
        {"open-prob", required_argument, NULL, 4},
        {"illumina1.3+", no_argument, NULL, '6'},
        {"count-orphans", no_argument, NULL, 'A'},
        {"bam-list", required_argument, NULL, 'b'},
        {"no-BAQ", no_argument, NULL, 'B'},
        {"no-baq", no_argument, NULL, 'B'},
        {"adjust-MQ", required_argument, NULL, 'C'},
        {"adjust-mq", required_argument, NULL, 'C'},
        {"max-depth", required_argument, NULL, 'd'},
        {"redo-BAQ", no_argument, NULL, 'E'},
        {"redo-baq", no_argument, NULL, 'E'},
        {"fasta-ref", required_argument, NULL, 'f'},
        {"exclude-RG", required_argument, NULL, 'G'},
        {"exclude-rg", required_argument, NULL, 'G'},
        {"positions", required_argument, NULL, 'l'},
        {"region", required_argument, NULL, 'r'},
        {"ignore-RG", no_argument, NULL, 'R'},
        {"ignore-rg", no_argument, NULL, 'R'},
        {"min-MQ", required_argument, NULL, 'q'},
        {"min-mq", required_argument, NULL, 'q'},
        {"min-BQ", required_argument, NULL, 'Q'},
        {"min-bq", required_argument, NULL, 'Q'},
        {"ignore-overlaps", no_argument, NULL, 'x'},
        {"BCF", no_argument, NULL, 'g'},
        {"bcf", no_argument, NULL, 'g'},
        {"VCF", no_argument, NULL, 'v'},
        {"vcf", no_argument, NULL, 'v'},
        {"output-BP", no_argument, NULL, 'O'},
        {"output-bp", no_argument, NULL, 'O'},
        {"output-MQ", no_argument, NULL, 's'},
        {"output-mq", no_argument, NULL, 's'},
        {"output-tags", required_argument, NULL, 't'},
        {"uncompressed", no_argument, NULL, 'u'},
        {"ext-prob", required_argument, NULL, 'e'},
        {"gap-frac", required_argument, NULL, 'F'},
        {"tandem-qual", required_argument, NULL, 'h'},
        {"skip-indels", no_argument, NULL, 'I'},
        {"max-idepth", required_argument, NULL, 'L'},
        {"min-ireads ", required_argument, NULL, 'm'},
        {"per-sample-mF", no_argument, NULL, 'p'},
        {"per-sample-mf", no_argument, NULL, 'p'},
        {"platforms", required_argument, NULL, 'P'},
        {NULL, 0, NULL, 0}
    };
    while ((c = getopt_long(argc, argv, "Agf:r:l:q:Q:uRC:BDSd:L:b:P:po:e:h:Im:F:EG:6OsVvxt:",lopts,NULL)) >= 0) {
        switch (c) {
        case 'x': mplp.flag &= ~MPLP_SMART_OVERLAPS; break;
        case  1 :
            mplp.rflag_require = bam_str2flag(optarg);
            if ( mplp.rflag_require<0 ) { fprintf(stderr,"Could not parse --rf %s\n", optarg); return 1; }
            break;
        case  2 :
            mplp.rflag_filter = bam_str2flag(optarg);
            if ( mplp.rflag_filter<0 ) { fprintf(stderr,"Could not parse --ff %s\n", optarg); return 1; }
            break;
        case  3 : mplp.output_fname = optarg; break;
        case  4 : mplp.openQ = atoi(optarg); break;
        case 'f':
            mplp.fai = fai_load(optarg);
            if (mplp.fai == 0) return 1;
            mplp.fai_fname = optarg;
            break;
        case 'd': mplp.max_depth = atoi(optarg); break;
        case 'r': mplp.reg = strdup(optarg); break;
        case 'l':
                  // In the original version the whole BAM was streamed which is inefficient
                  //  with few BED intervals and big BAMs. Todo: devise a heuristic to determine
                  //  best strategy, that is streaming or jumping.
                  mplp.bed = bed_read(optarg);
                  if (!mplp.bed) { print_error_errno("Could not read file \"%s\"", optarg); return 1; }
                  break;
        case 'P': mplp.pl_list = strdup(optarg); break;
        case 'p': mplp.flag |= MPLP_PER_SAMPLE; break;
        case 'g': mplp.flag |= MPLP_BCF; break;
        case 'v': mplp.flag |= MPLP_BCF | MPLP_VCF; break;
        case 'u': mplp.flag |= MPLP_NO_COMP | MPLP_BCF; break;
        case 'B': mplp.flag &= ~MPLP_REALN; break;
        case 'D': mplp.fmt_flag |= B2B_FMT_DP; fprintf(stderr, "[warning] samtools mpileup option `-D` is functional, but deprecated. Please switch to `-t DP` in future.\n"); break;
        case 'S': mplp.fmt_flag |= B2B_FMT_SP; fprintf(stderr, "[warning] samtools mpileup option `-S` is functional, but deprecated. Please switch to `-t SP` in future.\n"); break;
        case 'V': mplp.fmt_flag |= B2B_FMT_DV; fprintf(stderr, "[warning] samtools mpileup option `-V` is functional, but deprecated. Please switch to `-t DV` in future.\n"); break;
        case 'I': mplp.flag |= MPLP_NO_INDEL; break;
        case 'E': mplp.flag |= MPLP_REDO_BAQ; break;
        case '6': mplp.flag |= MPLP_ILLUMINA13; break;
        case 'R': mplp.flag |= MPLP_IGNORE_RG; break;
        case 's': mplp.flag |= MPLP_PRINT_MAPQ; break;
        case 'O': mplp.flag |= MPLP_PRINT_POS; break;
        case 'C': mplp.capQ_thres = atoi(optarg); break;
        case 'q': mplp.min_mq = atoi(optarg); break;
        case 'Q': mplp.min_baseQ = atoi(optarg); break;
        case 'b': file_list = optarg; break;
        case 'o': {
                char *end;
                long value = strtol(optarg, &end, 10);
                // Distinguish between -o INT and -o FILE (a bit of a hack!)
                if (*end == '\0') mplp.openQ = value;
                else mplp.output_fname = optarg;
            }
            break;
        case 'e': mplp.extQ = atoi(optarg); break;
        case 'h': mplp.tandemQ = atoi(optarg); break;
        case 'A': use_orphan = 1; break;
        case 'F': mplp.min_frac = atof(optarg); break;
        case 'm': mplp.min_support = atoi(optarg); break;
        case 'L': mplp.max_indel_depth = atoi(optarg); break;
        case 'G': {
                FILE *fp_rg;
                char buf[1024];
                mplp.rghash = khash_str2int_init();
                if ((fp_rg = fopen(optarg, "r")) == 0)
                    fprintf(stderr, "(%s) Fail to open file %s. Continue anyway.\n", __func__, optarg);
                while (!feof(fp_rg) && fscanf(fp_rg, "%s", buf) > 0) // this is not a good style, but forgive me...
                    khash_str2int_inc(mplp.rghash, strdup(buf));
                fclose(fp_rg);
            }
            break;
        case 't': mplp.fmt_flag |= parse_format_flag(optarg); break;
        default:
            fprintf(stderr,"Invalid option: '%c'\n", c);
            return 1;
        }
    }
    if ( !(mplp.flag&MPLP_REALN) && mplp.flag&MPLP_REDO_BAQ )
    {
        fprintf(stderr,"Error: The -B option cannot be combined with -E\n");
        return 1;
    }
    if (use_orphan) mplp.flag &= ~MPLP_NO_ORPHAN;
    if (argc == 1)
    {
        print_usage(stderr, &mplp);
        return 1;
    }
    int ret;
    if (file_list) {
        if ( read_file_list(file_list,&nfiles,&fn) ) return 1;
        ret = mpileup(&mplp,nfiles,fn);
        for (c=0; c<nfiles; c++) free(fn[c]);
        free(fn);
    }
    else
        ret = mpileup(&mplp, argc - optind, argv + optind);
    if (mplp.rghash) khash_str2int_destroy_free(mplp.rghash);
    free(mplp.reg); free(mplp.pl_list);
    if (mplp.fai) fai_destroy(mplp.fai);
    if (mplp.bed) bed_destroy(mplp.bed);
    return ret;
}
Example #17
0
int main_depth(int argc, char *argv[])
{
    int i, n, tid, beg, end, pos, *n_plp, baseQ = 0, mapQ = 0, min_len = 0, status = EXIT_SUCCESS, nfiles;
    const bam_pileup1_t **plp;
    char *reg = 0; // specified region
    void *bed = 0; // BED data structure
    char *file_list = NULL, **fn = NULL;
    bam_hdr_t *h = NULL; // BAM header of the 1st input
    aux_t **data;
    bam_mplp_t mplp;

    // parse the command line
    while ((n = getopt(argc, argv, "r:b:q:Q:l:f:")) >= 0) {
        switch (n) {
            case 'l': min_len = atoi(optarg); break; // minimum query length
            case 'r': reg = strdup(optarg); break;   // parsing a region requires a BAM header
            case 'b':
                bed = bed_read(optarg); // BED or position list file can be parsed now
                if (!bed) { print_error_errno("Could not read file \"%s\"", optarg); return 1; }
                break;
            case 'q': baseQ = atoi(optarg); break;   // base quality threshold
            case 'Q': mapQ = atoi(optarg); break;    // mapping quality threshold
            case 'f': file_list = optarg; break;
        }
    }
    if (optind == argc && !file_list) {
        fprintf(pysamerr, "\n");
        fprintf(pysamerr, "Usage: samtools depth [options] in1.bam [in2.bam [...]]\n");
        fprintf(pysamerr, "Options:\n");
        fprintf(pysamerr, "   -b <bed>            list of positions or regions\n");
        fprintf(pysamerr, "   -f <list>           list of input BAM filenames, one per line [null]\n");
        fprintf(pysamerr, "   -l <int>            read length threshold (ignore reads shorter than <int>)\n");
        fprintf(pysamerr, "   -q <int>            base quality threshold\n");
        fprintf(pysamerr, "   -Q <int>            mapping quality threshold\n");
        fprintf(pysamerr, "   -r <chr:from-to>    region\n");
        fprintf(pysamerr, "\n");
        return 1;
    }

    // initialize the auxiliary data structures
    if (file_list)
    {
        if ( read_file_list(file_list,&nfiles,&fn) ) return 1;
        n = nfiles;
        argv = fn;
        optind = 0;
    }
    else
        n = argc - optind; // the number of BAMs on the command line
    data = calloc(n, sizeof(aux_t*)); // data[i] for the i-th input
    beg = 0; end = 1<<30;  // set the default region
    for (i = 0; i < n; ++i) {
        data[i] = calloc(1, sizeof(aux_t));
        data[i]->fp = sam_open(argv[optind+i], "r"); // open BAM
        if (data[i]->fp == NULL) {
            print_error_errno("Could not open \"%s\"", argv[optind+i]);
            status = EXIT_FAILURE;
            goto depth_end;
        }
        if (hts_set_opt(data[i]->fp, CRAM_OPT_REQUIRED_FIELDS,
                        SAM_FLAG | SAM_RNAME | SAM_POS | SAM_MAPQ | SAM_CIGAR |
                        SAM_SEQ)) {
            fprintf(pysamerr, "Failed to set CRAM_OPT_REQUIRED_FIELDS value\n");
            return 1;
        }
        if (hts_set_opt(data[i]->fp, CRAM_OPT_DECODE_MD, 0)) {
            fprintf(pysamerr, "Failed to set CRAM_OPT_DECODE_MD value\n");
            return 1;
        }
        data[i]->min_mapQ = mapQ;                    // set the mapQ filter
        data[i]->min_len  = min_len;                 // set the qlen filter
        data[i]->hdr = sam_hdr_read(data[i]->fp);    // read the BAM header
        if (reg) { // if a region is specified
            hts_idx_t *idx = sam_index_load(data[i]->fp, argv[optind+i]);  // load the index
            if (idx == NULL) {
                print_error("can't load index for \"%s\"", argv[optind+i]);
                status = EXIT_FAILURE;
                goto depth_end;
            }
            data[i]->iter = sam_itr_querys(idx, data[i]->hdr, reg); // set the iterator
            hts_idx_destroy(idx); // the index is not needed any more; free the memory
            if (data[i]->iter == NULL) {
                print_error("can't parse region \"%s\"", reg);
                status = EXIT_FAILURE;
                goto depth_end;
            }
        }
    }

    h = data[0]->hdr; // easy access to the header of the 1st BAM
    if (reg) {
        beg = data[0]->iter->beg; // and to the parsed region coordinates
        end = data[0]->iter->end;
    }

    // the core multi-pileup loop
    mplp = bam_mplp_init(n, read_bam, (void**)data); // initialization
    n_plp = calloc(n, sizeof(int)); // n_plp[i] is the number of covering reads from the i-th BAM
    plp = calloc(n, sizeof(bam_pileup1_t*)); // plp[i] points to the array of covering reads (internal in mplp)
    while (bam_mplp_auto(mplp, &tid, &pos, n_plp, plp) > 0) { // come to the next covered position
        if (pos < beg || pos >= end) continue; // out of range; skip
        if (bed && bed_overlap(bed, h->target_name[tid], pos, pos + 1) == 0) continue; // not in BED; skip
        fputs(h->target_name[tid], stdout); printf("\t%d", pos+1); // a customized printf() would be faster
        for (i = 0; i < n; ++i) { // base level filters have to go here
            int j, m = 0;
            for (j = 0; j < n_plp[i]; ++j) {
                const bam_pileup1_t *p = plp[i] + j; // DON'T modfity plp[][] unless you really know
                if (p->is_del || p->is_refskip) ++m; // having dels or refskips at tid:pos
                else if (bam_get_qual(p->b)[p->qpos] < baseQ) ++m; // low base quality
            }
            printf("\t%d", n_plp[i] - m); // this the depth to output
        }
        putchar('\n');
    }
    free(n_plp); free(plp);
    bam_mplp_destroy(mplp);

depth_end:
    for (i = 0; i < n && data[i]; ++i) {
        bam_hdr_destroy(data[i]->hdr);
        if (data[i]->fp) sam_close(data[i]->fp);
        hts_itr_destroy(data[i]->iter);
        free(data[i]);
    }
    free(data); free(reg);
    if (bed) bed_destroy(bed);
    if ( file_list )
    {
        for (i=0; i<n; i++) free(fn[i]);
        free(fn);
    }
    return status;
}
Example #18
0
int bam_flagstat(int argc, char *argv[])
{
    samFile *fp;
    bam_hdr_t *header;
    bam_flagstat_t *s;
    char b0[16], b1[16];
    int c;

    enum {
        INPUT_FMT_OPTION = CHAR_MAX+1,
    };

    sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
    static const struct option lopts[] = {
        SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', '-', '@'),
        {NULL, 0, NULL, 0}
    };

    while ((c = getopt_long(argc, argv, "@:", lopts, NULL)) >= 0) {
        switch (c) {
        default:  if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
            /* else fall-through */
        case '?':
            usage_exit(samtools_stderr, EXIT_FAILURE);
        }
    }

    if (argc != optind+1) {
        if (argc == optind) usage_exit(samtools_stdout, EXIT_SUCCESS);
        else usage_exit(samtools_stderr, EXIT_FAILURE);
    }
    fp = sam_open_format(argv[optind], "r", &ga.in);
    if (fp == NULL) {
        print_error_errno("flagstat", "Cannot open input file \"%s\"", argv[optind]);
        return 1;
    }
    if (ga.nthreads > 0)
        hts_set_threads(fp, ga.nthreads);

    if (hts_set_opt(fp, CRAM_OPT_REQUIRED_FIELDS,
                    SAM_FLAG | SAM_MAPQ | SAM_RNEXT)) {
        fprintf(samtools_stderr, "Failed to set CRAM_OPT_REQUIRED_FIELDS value\n");
        return 1;
    }

    if (hts_set_opt(fp, CRAM_OPT_DECODE_MD, 0)) {
        fprintf(samtools_stderr, "Failed to set CRAM_OPT_DECODE_MD value\n");
        return 1;
    }

    header = sam_hdr_read(fp);
    if (header == NULL) {
        fprintf(samtools_stderr, "Failed to read header for \"%s\"\n", argv[optind]);
        return 1;
    }
    s = bam_flagstat_core(fp, header);
    fprintf(samtools_stdout, "%lld + %lld in total (QC-passed reads + QC-failed reads)\n", s->n_reads[0], s->n_reads[1]);
    fprintf(samtools_stdout, "%lld + %lld secondary\n", s->n_secondary[0], s->n_secondary[1]);
    fprintf(samtools_stdout, "%lld + %lld supplementary\n", s->n_supp[0], s->n_supp[1]);
    fprintf(samtools_stdout, "%lld + %lld duplicates\n", s->n_dup[0], s->n_dup[1]);
    fprintf(samtools_stdout, "%lld + %lld mapped (%s : %s)\n", s->n_mapped[0], s->n_mapped[1], percent(b0, s->n_mapped[0], s->n_reads[0]), percent(b1, s->n_mapped[1], s->n_reads[1]));
    fprintf(samtools_stdout, "%lld + %lld paired in sequencing\n", s->n_pair_all[0], s->n_pair_all[1]);
    fprintf(samtools_stdout, "%lld + %lld read1\n", s->n_read1[0], s->n_read1[1]);
    fprintf(samtools_stdout, "%lld + %lld read2\n", s->n_read2[0], s->n_read2[1]);
    fprintf(samtools_stdout, "%lld + %lld properly paired (%s : %s)\n", s->n_pair_good[0], s->n_pair_good[1], percent(b0, s->n_pair_good[0], s->n_pair_all[0]), percent(b1, s->n_pair_good[1], s->n_pair_all[1]));
    fprintf(samtools_stdout, "%lld + %lld with itself and mate mapped\n", s->n_pair_map[0], s->n_pair_map[1]);
    fprintf(samtools_stdout, "%lld + %lld singletons (%s : %s)\n", s->n_sgltn[0], s->n_sgltn[1], percent(b0, s->n_sgltn[0], s->n_pair_all[0]), percent(b1, s->n_sgltn[1], s->n_pair_all[1]));
    fprintf(samtools_stdout, "%lld + %lld with mate mapped to a different chr\n", s->n_diffchr[0], s->n_diffchr[1]);
    fprintf(samtools_stdout, "%lld + %lld with mate mapped to a different chr (mapQ>=5)\n", s->n_diffhigh[0], s->n_diffhigh[1]);
    free(s);
    bam_hdr_destroy(header);
    sam_close(fp);
    sam_global_args_free(&ga);
    return 0;
}
Example #19
0
int bam_mating(int argc, char *argv[])
{
    htsThreadPool p = {NULL, 0};
    samFile *in = NULL, *out = NULL;
    int c, remove_reads = 0, proper_pair_check = 1, add_ct = 0, res = 1, mate_score = 0;
    sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
    char wmode[3] = {'w', 'b', 0};
    static const struct option lopts[] = {
        SAM_OPT_GLOBAL_OPTIONS('-', 0, 'O', 0, 0, '@'),
        { NULL, 0, NULL, 0 }
    };

    // parse args
    if (argc == 1) { usage(stdout); return 0; }
    while ((c = getopt_long(argc, argv, "rpcmO:@:", lopts, NULL)) >= 0) {
        switch (c) {
            case 'r': remove_reads = 1; break;
            case 'p': proper_pair_check = 0; break;
            case 'c': add_ct = 1; break;
            case 'm': mate_score = 1; break;
            default:  if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
                      /* else fall-through */
            case '?': usage(stderr); goto fail;
        }
    }
    if (optind+1 >= argc) { usage(stderr); goto fail; }

    // init
    if ((in = sam_open_format(argv[optind], "rb", &ga.in)) == NULL) {
        print_error_errno("fixmate", "cannot open input file");
        goto fail;
    }
    sam_open_mode(wmode+1, argv[optind+1], NULL);
    if ((out = sam_open_format(argv[optind+1], wmode, &ga.out)) == NULL) {
        print_error_errno("fixmate", "cannot open output file");
        goto fail;
    }

    if (ga.nthreads > 0) {
        if (!(p.pool = hts_tpool_init(ga.nthreads))) {
            fprintf(stderr, "Error creating thread pool\n");
            goto fail;
        }
        hts_set_opt(in,  HTS_OPT_THREAD_POOL, &p);
        hts_set_opt(out, HTS_OPT_THREAD_POOL, &p);
    }

    // run
    res = bam_mating_core(in, out, remove_reads, proper_pair_check, add_ct, mate_score);

    // cleanup
    sam_close(in);
    if (sam_close(out) < 0) {
        fprintf(stderr, "[bam_mating] error while closing output file\n");
        res = 1;
    }

    if (p.pool) hts_tpool_destroy(p.pool);
    sam_global_args_free(&ga);
    return res;

 fail:
    if (in) sam_close(in);
    if (out) sam_close(out);
    if (p.pool) hts_tpool_destroy(p.pool);
    sam_global_args_free(&ga);
    return 1;
}