Пример #1
0
uint calculate_cov_params(const char* const bam_name,
                          const int32_t tid,
                          const int32_t start,
                          const int32_t stop)
{
    bamFile fp = bam_open(bam_name, "r");
    bam_index_t* fp_index = bam_index_load(bam_name);

    bam_plbuf_t *buf;

    covdata* cvdt = ckallocz(sizeof(covdata));
    cvdt->tid = tid;
    cvdt->begin = start;
    cvdt->end   = stop;
    cvdt->coverage = ckallocz((cvdt->end - cvdt->begin) * sizeof(uint32_t));
    
    buf = bam_plbuf_init(pileup_func, cvdt);
    bam_fetch(fp, fp_index, tid, start, stop, buf, fetch_func);
    bam_plbuf_push(0, buf);
    bam_plbuf_destroy(buf);  

    // calculate the mean coverage in the region of the putative deletion
    uint i, covsum;
    for(i = 0, covsum = 0; i < (cvdt->end - cvdt->begin); i++){
        covsum += cvdt->coverage[i];
    }
  
    uint avgcov = floor(covsum * 1.0/(cvdt->end - cvdt->begin));
    ckfree(cvdt->coverage);
    ckfree(cvdt);

    bam_close(fp);   
    bam_index_destroy(fp_index);
    return avgcov;
}
Пример #2
0
int main(int argc, char *argv[])
{
     char *progname;

     char *bamfilename;
     int32_t tid;

     samfile_t *bamin;
     bam_index_t *bamidx;
     bam_plbuf_t *buf;
     bam1_t *bam_read;
     uint32_t next_pos = 1;

     progname = *argv;
     argv++; argc--;
     if (argc < 2) {
          printf("Usage: %s bam_file tid\n", progname);
          exit(1);
     }
     else {
          bamfilename = argv[0];
          tid = strtol(argv[1], NULL, 10);
     }

     /* try to open bam file */
     bamin = samopen(bamfilename, "rb", NULL);
     if (!bamin) {
          fprintf(stderr, "Error opening bamfile %s\n", bamfilename);
          exit(1);
     }
     /* try to open index */
     bamidx = bam_index_load(bamfilename);
     if (!bamidx) {
          fprintf(stderr, "Error opening index for %s\n", bamfilename);
          exit(1);
     }
     bam_read = bam_init1();

     buf = bam_plbuf_init(&pileup_func, &next_pos);
     /* disable maximum pileup depth */
     bam_plp_set_maxcnt(buf->iter, INT_MAX);
     bam_fetch(bamin->x.bam, bamidx,
               tid, 0, INT_MAX,
               buf, &fetch_func);
     bam_plbuf_push(0, buf);    /* finish pileup */

     bam_plbuf_destroy(buf);
     bam_destroy1(bam_read);
     bam_index_destroy(bamidx);
     samclose(bamin);
     return 0;
}
Пример #3
0
int bam_pileup_file(bamFile fp, int mask, bam_pileup_f func, void *func_data)
{
	bam_plbuf_t *buf;
	int ret;
	bam1_t *b;
	b = bam_init1();
	buf = bam_plbuf_init(func, func_data);
	bam_plbuf_set_mask(buf, mask);
	while ((ret = bam_read1(fp, b)) >= 0)
		bam_plbuf_push(b, buf);
	bam_plbuf_push(0, buf);
	bam_plbuf_destroy(buf);
	bam_destroy1(b);
	return 0;
}
Пример #4
0
void bam_lplbuf_destroy(bam_lplbuf_t *tv)
{
	freenode_t *p, *q;
	free(tv->cur_level); free(tv->pre_level);
	bam_plbuf_destroy(tv->plbuf);
	free(tv->aux);
	for (p = tv->head; p->next;) {
		q = p->next;
		mp_free(tv->mp, p); p = q;
	}
	mp_free(tv->mp, p);
	assert(tv->mp->cnt == 0);
	mp_destroy(tv->mp);
	free(tv);
}
Пример #5
0
int sampileup(samfile_t *fp, int mask, bam_pileup_f func, void *func_data)
{
    bam_plbuf_t *buf;
    int ret;
    bam1_t *b;
    b = bam_init1();
    buf = bam_plbuf_init(func, func_data);
    if (mask < 0) mask = BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP;
    else mask |= BAM_FUNMAP;
    while ((ret = samread(fp, b)) >= 0) {
        // bam_plp_push() itself now filters out unmapped reads only
        if (b->core.flag & mask) b->core.flag |= BAM_FUNMAP;
        bam_plbuf_push(b, buf);
    }
    bam_plbuf_push(0, buf);
    bam_plbuf_destroy(buf);
    bam_destroy1(b);
    return 0;
}
Пример #6
0
int main(int argc, char *argv[])  
{  
	tmpstruct_t tmp;  
	if (argc == 1) {  
		fprintf(stderr, "Usage: calDepth <in.bam> [region]\n");  
		return 1;  
	}  
	tmp.beg = 0; tmp.end = 0x7fffffff;  
	tmp.in = samopen(argv[1], "rb", 0);  
	if (tmp.in == 0) {  
		fprintf(stderr, "Fail to open BAM file %s\n", argv[1]);  
		return 1;  
	}  
	if (argc == 2) { // if a region is not specified  
		sampileup(tmp.in, -1, pileup_func, &tmp);  
	} else {  
		int ref;  
		bam_index_t *idx;  
		bam_plbuf_t *buf;  
		idx = bam_index_load(argv[1]); // load BAM index  
		if (idx == 0) {  
			fprintf(stderr, "BAM indexing file is not available.\n");  
			return 1;  
		}  
		bam_parse_region(tmp.in->header, argv[2], &ref,  
										 &tmp.beg, &tmp.end); // parse the region  
		if (ref < 0) {  
			fprintf(stderr, "Invalid region %s\n", argv[2]);  
			return 1;  
		}  
		buf = bam_plbuf_init(pileup_func, &tmp); // initialize pileup  
		bam_fetch(tmp.in->x.bam, idx, ref, tmp.beg, tmp.end, buf, fetch_func);  
		bam_plbuf_push(0, buf); // finalize pileup  
		bam_index_destroy(idx);  
		bam_plbuf_destroy(buf);  
	}  
	samclose(tmp.in);  
	return 0;

}
Пример #7
0
void mapper( char *ref, int length, int start_base_pos, const char *bam )
{  
    anal_t input;  
    gzFile pRef;
    kseq_t * seq = NULL;
    char chr[8] = { 0, };
    int ret;
    bam_plbuf_t *buf;
    bam1_t *b;

/*
    fprintf( stderr, "ref: %s\n", ref );
    fprintf( stderr, "length: %d\n", length );
    fprintf( stderr, "start_base_pos: %d\n", start_base_pos );
    fprintf( stderr, "bam: %s\n", bam );
*/
    input.beg = 0; input.end = 0x7fffffff;  
    input.in = samopen(bam, "rb", 0);
  
    if (input.in == 0) 
    {  
        fprintf(stderr, "Fail to open BAM file %s\n", bam);  
        return;  
    }  

    pRef = gzopen( ref, "r" );

        fprintf( stderr, "ref : %s\n", ref );
        fprintf( stderr, "pRef: %p\n", pRef );
    if( pRef == NULL )
    {
        fprintf( stderr, "ref : %s\n", ref );
        fprintf( stderr, "pRef: %p\n", pRef );

        return;
    }

    seq = kseq_init( pRef );

    b = bam_init1(); // alloc memory size of bam1_t
//fprintf( stderr, "%\pn", b );
    buf = bam_plbuf_init(pileup_func, &input); // alloc memory

    bam_plbuf_set_mask(buf, -1);
    
    while ((ret = samread( input.in, b)) >= 0)
    {   
        bam_plbuf_push(b, buf); 
        
//fprintf( stderr, "%x\n", b->core.flag );
        if( b->core.flag & 0x0004 ) // unmapped
        {    // do nothing
/*
            qname1 = strtok(bam1_qname(b), ":\t\n ");
            qname2 = strtok(NULL, ":\t\n ");
            qname3 = atoi(qname2);

            fprintf( stderr, "%s:%10d:%s:%d\t%c:%d:%d:%d\n", 
                qname1, qname3, "*", b->core.pos,
                '*', b->core.flag, b->core.qual, ret );
*/
            fprintf( stdout, "%s:%s:%d\t%c:0x%x:%d:%d\n", 
                bam1_qname(b), "*", b->core.pos+1,
                '*', b->core.flag, b->core.qual, ret );
/*
            fprintf( stderr, "%s:%s:%d\t%c:0x%x:%d:%d\n", 
                bam1_qname(b), "*", b->core.pos,
                '*', b->core.flag, b->core.qual, ret );
*/
        }
        else
        {
            // to find a base in the reference genome, seq.

            if( ( seq != NULL ) &&  
                ( strcmp( input.in->header->target_name[b->core.tid], chr ) == 0 ) )
            {
                // already found that 
                // fprintf( stderr, "found : %s\n", chr );
            }else
            {
                if( find_chr(input.in->header->target_name[b->core.tid], seq, chr) < 0 )
                {
                     fprintf( stderr, "ERROR : cannot find chromosome %s\n", \
                             input.in->header->target_name[b->core.tid] );
                }else
                {
                     fprintf( stderr, "FOUND CHR : %s\n", chr );
                }          
            }
            // remove not aligned to the chromosome

            fprintf( stdout, "%s:%s:%d\t%c:%d:%d:%d\n", 
                bam1_qname(b),
                input.in->header->target_name[b->core.tid], 
                b->core.pos+1,
                seq->seq.s[b->core.pos], b->core.flag, b->core.qual, ret );

/*
            fprintf( stderr, "%s:%s:%d\t%c:%d:%d:%d\n", 
                bam1_qname(b),
                input.in->header->target_name[b->core.tid], 
                b->core.pos,
                seq->seq.s[b->core.pos], b->core.flag, b->core.qual, ret );
*/
        }
    }

    // for the last bases...
  
//    printf("pos:%d(%c), flag:%d qual: %d(ret %d)\n", 
//           b->core.pos+1, seq->seq.s[b->core.pos], b->core.flag, b->core.qual, ret );

    bam_plbuf_push(0, buf); 

    bam_plbuf_destroy(buf); // release memory
    bam_destroy1(b);  // release memory size of bam1_t
     
    samclose(input.in);  
 
    kseq_destroy( seq );
    gzclose( pRef );

    return;  
}  
Пример #8
0
int main_ld(int argc, char *argv[])
{
    int chr;                  //! chromosome identifier
    int beg;                  //! beginning coordinate for analysis
    int end;                  //! end coordinate for analysis
    int ref;                  //! ref
    long num_windows;         //! number of windows
    std::string msg;          //! string for error message
    bam_plbuf_t *buf;         //! pileup buffer
    ldData t;

    // parse the command line options
    std::string region = t.parseCommandLine(argc, argv);

    // check input BAM file for errors
    t.checkBAM();

    // initialize the sample data structure
    t.bam_smpl_init();

    // add samples
    t.bam_smpl_add();

    // initialize error model
    t.em = errmod_init(1.0-0.83);

    // parse genomic region
    int k = bam_parse_region(t.h, region, &chr, &beg, &end);
    if (k < 0)
    {
        msg = "Bad genome coordinates: " + region;
        fatal_error(msg, __FILE__, __LINE__, 0);
    }

    // fetch reference sequence
    t.ref_base = faidx_fetch_seq(t.fai_file, t.h->target_name[chr], 0, 0x7fffffff, &(t.len));

    // calculate the number of windows
    if (t.flag & BAM_WINDOW)
        num_windows = ((end-beg)-1)/t.win_size;
    else
    {
        t.win_size = (end-beg);
        num_windows = 1;
    }

    // iterate through all windows along specified genomic region
    for (long cw=0; cw < num_windows; cw++)
    {

        // construct genome coordinate string
        std::string scaffold_name(t.h->target_name[chr]);
        std::ostringstream winc(scaffold_name);
        winc.seekp(0, std::ios::end);
        winc << ":" << beg+(cw*t.win_size)+1 << "-" << ((cw+1)*t.win_size)+(beg-1);
        std::string winCoord = winc.str();

        // initialize number of sites to zero
        t.num_sites = 0;

        // parse the BAM file and check if region is retrieved from the reference
        if (t.flag & BAM_WINDOW)
        {
            k = bam_parse_region(t.h, winCoord, &ref, &(t.beg), &(t.end));
            if (k < 0)
            {
                msg = "Bad window coordinates " + winCoord;
                fatal_error(msg, __FILE__, __LINE__, 0);
            }
        }
        else
        {
            ref = chr;
            t.beg = beg;
            t.end = end;
            if (ref < 0)
            {
                msg = "Bad scaffold name: " + region;
                fatal_error(msg, __FILE__, __LINE__, 0);
            }
        }

        // initialize nucdiv variables
        t.init_ld();

        // create population assignments
        t.assign_pops();

        // initialize pileup
        buf = bam_plbuf_init(make_ld, &t);

        // fetch region from bam file
        if ((bam_fetch(t.bam_in->x.bam, t.idx, ref, t.beg, t.end, buf, fetch_func)) < 0)
        {
            msg = "Failed to retrieve region " + region + " due to corrupted BAM index file";
            fatal_error(msg, __FILE__, __LINE__, 0);
        }

        // finalize pileup
        bam_plbuf_push(0, buf);

        // calculate linkage disequilibrium statistics
        ld_func fp[3] = {&ldData::calc_zns, &ldData::calc_omegamax, &ldData::calc_wall};
        (t.*fp[t.output])();

        // print results to stdout
        t.print_ld(chr);

        // take out the garbage
        t.destroy_ld();
        bam_plbuf_destroy(buf);
    }
    // end of window interation

    errmod_destroy(t.em);
    samclose(t.bam_in);
    bam_index_destroy(t.idx);
    t.bam_smpl_destroy();
    free(t.ref_base);

    return 0;
}