//this function will print a samfile from the bamfile int motherView(bufReader *rd,int nFiles,std::vector<regs>regions) { aRead b; b.vDat=new uint8_t[RLEN]; kstring_t str; str.s=NULL; str.l=str.m=0; if(regions.size()==0) {//print all int block_size; while(SIG_COND && bgzf_read(rd[0].fp,&block_size,sizeof(int))){ getAlign(rd[0].fp,block_size,b); printReadBuffered(b,rd[0].hd,str); fprintf(stdout,"%s",str.s); } }else { for(int i=0;i<(int)regions.size();i++){ int tmpRef = regions[i].refID; int tmpStart = regions[i].start; int tmpStop = regions[i].stop; getOffsets(rd[0].bamfname,rd[0].hd,rd[0].it,tmpRef,tmpStart,tmpStop); int ret =0; while(SIG_COND){ ret = bam_iter_read(rd[0].fp, &rd[0].it, b); if(ret<0) break; printReadBuffered(b,rd[0].hd,str); fprintf(stdout,"%s",str.s); } free(rd[0].it.off);//the offsets } free(str.s); delete [] b.vDat; } return 0; }
// This function reads a BAM alignment from one BAM file. static int read_bam(void *data, bam1_t *b) // read level filters better go here to avoid pileup { aux_t *aux = (aux_t*)data; // data in fact is a pointer to an auxiliary structure int ret = aux->iter? bam_iter_read(aux->fp, aux->iter, b) : bam_read1(aux->fp, b); if ((int)b->core.qual < aux->min_mapQ) b->core.flag |= BAM_FUNMAP; return ret; }
static int read_bam(void *data, bam1_t *b) { aux_t *aux = (aux_t*)data; int ret = bam_iter_read(aux->fp, aux->iter, b); if ((int)b->core.qual < aux->min_mapQ) b->core.flag |= BAM_FUNMAP; return ret; }
// This function reads a BAM alignment from one BAM file. static int read_bam(void *data, bam1_t *b) // read level filters better go here to avoid pileup { aux_t *aux = (aux_t*)data; // data in fact is a pointer to an auxiliary structure int ret = aux->iter? bam_iter_read(aux->fp, aux->iter, b) : bam_read1(aux->fp, b); if (!(b->core.flag&BAM_FUNMAP)) { if ((int)b->core.qual < aux->min_mapQ) b->core.flag |= BAM_FUNMAP; else if (aux->min_len && bam_cigar2qlen(&b->core, bam1_cigar(b)) < aux->min_len) b->core.flag |= BAM_FUNMAP; } return ret; }
void hash_reads( table* T, const char* reads_fn, interval_stack* is ) { samfile_t* reads_f = samopen( reads_fn, "rb", NULL ); if( reads_f == NULL ) { failf( "Can't open bam file '%s'.", reads_fn ); } bam_index_t* reads_index = bam_index_load( reads_fn ); if( reads_index == NULL ) { failf( "Can't open bam index '%s.bai'.", reads_fn ); } bam_init_header_hash( reads_f->header ); table_create( T, reads_f->header->n_targets ); T->seq_names = (char**)malloc( sizeof(char*) * reads_f->header->n_targets ); size_t k; for( k = 0; k < reads_f->header->n_targets; k++ ) { T->seq_names[k] = strdup(reads_f->header->target_name[k]); } log_puts( LOG_MSG, "hashing reads ... \n" ); log_indent(); bam_iter_t read_iter; bam1_t* read = bam_init1(); int tid; interval_stack::iterator i; for( i = is->begin(); i != is->end(); i++ ) { tid = bam_get_tid( reads_f->header, i->seqname ); if( tid < 0 ) continue; read_iter = bam_iter_query( reads_index, tid, i->start, i->end ); while( bam_iter_read( reads_f->x.bam, read_iter, read ) >= 0 ) { if( bam1_strand(read) == i->strand ) { table_inc( T, read ); } } bam_iter_destroy(read_iter); } bam_destroy1(read); log_unindent(); log_printf( LOG_MSG, "done. (%zu unique reads hashed)\n", T->m ); bam_index_destroy(reads_index); samclose(reads_f); }
static int mplp_func(void *data, bam1_t *b) { extern int bam_realn(bam1_t *b, const char *ref); extern int bam_prob_realn_core(bam1_t *b, const char *ref, int); extern int bam_cap_mapQ(bam1_t *b, char *ref, int thres); mplp_aux_t *ma = (mplp_aux_t*)data; int ret, skip = 0; do { int has_ref; ret = ma->iter? bam_iter_read(ma->fp, ma->iter, b) : bam_read1(ma->fp, b); if (ret < 0) break; if (b->core.tid < 0 || (b->core.flag&BAM_FUNMAP)) { // exclude unmapped reads skip = 1; continue; } if (ma->conf->bed) { // test overlap skip = !bed_overlap(ma->conf->bed, ma->h->target_name[b->core.tid], b->core.pos, bam_calend(&b->core, bam1_cigar(b))); if (skip) continue; } if (ma->conf->rghash) { // exclude read groups uint8_t *rg = bam_aux_get(b, "RG"); skip = (rg && bcf_str2id(ma->conf->rghash, (const char*)(rg+1)) >= 0); if (skip) continue; } if (ma->conf->flag & MPLP_ILLUMINA13) { int i; uint8_t *qual = bam1_qual(b); for (i = 0; i < b->core.l_qseq; ++i) qual[i] = qual[i] > 31? qual[i] - 31 : 0; } has_ref = (ma->ref && ma->ref_id == b->core.tid)? 1 : 0; skip = 0; if (has_ref && (ma->conf->flag&MPLP_REALN)) bam_prob_realn_core(b, ma->ref, (ma->conf->flag & MPLP_EXT_BAQ)? 3 : 1); if (has_ref && ma->conf->capQ_thres > 10) { int q = bam_cap_mapQ(b, ma->ref, ma->conf->capQ_thres); if (q < 0) skip = 1; else if (b->core.qual > q) b->core.qual = q; } else if (b->core.qual < ma->conf->min_mq) skip = 1; else if ((ma->conf->flag&MPLP_NO_ORPHAN) && (b->core.flag&1) && !(b->core.flag&2)) skip = 1; } while (skip); return ret; }
int sam_fetch(char *ifn, char *ofn, char *reg, void *data, sam_fetch_f func) { int ret = 0; samfile_t *in = samopen(ifn, "rb", 0); samfile_t *out = 0; if (ofn) out = samopen(ofn, "wb", in->header); if (reg) { bam_index_t *idx = bam_index_load(ifn); if (idx == 0) { fprintf(stderr, "[%s:%d] Random alignment retrieval only works for indexed BAM files.\n", __func__, __LINE__); exit(1); } int tid, beg, end; bam_parse_region(in->header, reg, &tid, &beg, &end); if (tid < 0) { fprintf(stderr, "[%s:%d] Region \"%s\" specifies an unknown reference name. \n", __func__, __LINE__, reg); exit(1); } bam_iter_t iter; bam1_t *b = bam_init1(); iter = bam_iter_query(idx, tid, beg, end); while ((ret = bam_iter_read(in->x.bam, iter, b)) >= 0) func(b, in, out, data); bam_iter_destroy(iter); bam_destroy1(b); bam_index_destroy(idx); } else { bam1_t *b = bam_init1(); while ((ret = samread(in, b)) >= 0) func(b, in, out, data); bam_destroy1(b); } if (out) samclose(out); samclose(in); if (ret != -1) { /* truncated is -2 */ fprintf(stderr, "[%s:%d] Alignment retrieval failed due to truncated file\n", __func__, __LINE__); exit(1); } return ret; }
bool bam_streamer:: next() { if (NULL==_bfp) return false; int ret; if (NULL == _biter) { ret = samread(_bfp, _brec._bp); } else { ret = bam_iter_read(_bfp->x.bam, _biter, _brec._bp); } _is_record_set=(ret >= 0); if (_is_record_set) _record_no++; return _is_record_set; }
// Read the next bam record from the bam file and store it in pBamInStream->pNewNode static inline int SR_BamInStreamLoadNext(SR_BamInStream* pBamInStream) { if (pBamInStream->bam_cur_status < 0) return -1; // for the bam alignment array, if we need to expand its space // we have to initialize those newly created bam alignment // and update the query name hash since the address of those // bam alignments are changed after expanding pBamInStream->pNewNode = SR_BamNodeAlloc(pBamInStream->pMemPool); if (pBamInStream->pNewNode == NULL) SR_ErrQuit("ERROR: Too many unpaired reads are stored in the memory. Please use smaller bin size or disable searching pair genomically.\n"); int ret; if (pBamInStream->pBamIterator != NULL) ret = bam_iter_read(pBamInStream->fpBamInput, *(pBamInStream->pBamIterator), &(pBamInStream->pNewNode->alignment)); else ret = bam_read1(pBamInStream->fpBamInput, &(pBamInStream->pNewNode->alignment)); pBamInStream->bam_cur_status = ret; return ret; }
int bam_merge_core2(int by_qname, const char *out, const char *headers, int n, char * const *fn, int flag, const char *reg, int level) #endif { bamFile fpout, *fp; heap1_t *heap; bam_header_t *hout = 0; bam_header_t *hheaders = NULL; int i, j, *RG_len = 0; uint64_t idx = 0; char **RG = 0, mode[8]; bam_iter_t *iter = 0; if (headers) { tamFile fpheaders = sam_open(headers); if (fpheaders == 0) { const char *message = strerror(errno); fprintf(stderr, "[bam_merge_core] cannot open '%s': %s\n", headers, message); return -1; } hheaders = sam_header_read(fpheaders); sam_close(fpheaders); } g_is_by_qname = by_qname; fp = (bamFile*)calloc(n, sizeof(bamFile)); heap = (heap1_t*)calloc(n, sizeof(heap1_t)); iter = (bam_iter_t*)calloc(n, sizeof(bam_iter_t)); // prepare RG tag if (flag & MERGE_RG) { RG = (char**)calloc(n, sizeof(void*)); RG_len = (int*)calloc(n, sizeof(int)); for (i = 0; i != n; ++i) { int l = strlen(fn[i]); const char *s = fn[i]; if (l > 4 && strcmp(s + l - 4, ".bam") == 0) l -= 4; for (j = l - 1; j >= 0; --j) if (s[j] == '/') break; ++j; l -= j; RG[i] = calloc(l + 1, 1); RG_len[i] = l; strncpy(RG[i], s + j, l); } } // read the first for (i = 0; i != n; ++i) { bam_header_t *hin; fp[i] = bam_open(fn[i], "r"); if (fp[i] == 0) { int j; fprintf(stderr, "[bam_merge_core] fail to open file %s\n", fn[i]); for (j = 0; j < i; ++j) bam_close(fp[j]); free(fp); free(heap); // FIXME: possible memory leak return -1; } hin = bam_header_read(fp[i]); if (i == 0) { // the first BAM hout = hin; } else { // validate multiple baf int min_n_targets = hout->n_targets; if (hin->n_targets < min_n_targets) min_n_targets = hin->n_targets; for (j = 0; j < min_n_targets; ++j) if (strcmp(hout->target_name[j], hin->target_name[j]) != 0) { fprintf(stderr, "[bam_merge_core] different target sequence name: '%s' != '%s' in file '%s'\n", hout->target_name[j], hin->target_name[j], fn[i]); return -1; } // If this input file has additional target reference sequences, // add them to the headers to be output if (hin->n_targets > hout->n_targets) { swap_header_targets(hout, hin); // FIXME Possibly we should also create @SQ text headers // for the newly added reference sequences } bam_header_destroy(hin); } } if (hheaders) { // If the text headers to be swapped in include any @SQ headers, // check that they are consistent with the existing binary list // of reference information. if (hheaders->n_targets > 0) { if (hout->n_targets != hheaders->n_targets) { fprintf(stderr, "[bam_merge_core] number of @SQ headers in '%s' differs from number of target sequences\n", headers); if (!reg) return -1; } for (j = 0; j < hout->n_targets; ++j) if (strcmp(hout->target_name[j], hheaders->target_name[j]) != 0) { fprintf(stderr, "[bam_merge_core] @SQ header '%s' in '%s' differs from target sequence\n", hheaders->target_name[j], headers); if (!reg) return -1; } } swap_header_text(hout, hheaders); bam_header_destroy(hheaders); } if (reg) { int tid, beg, end; if (bam_parse_region(hout, reg, &tid, &beg, &end) < 0) { fprintf(stderr, "[%s] Malformated region string or undefined reference name\n", __func__); return -1; } for (i = 0; i < n; ++i) { bam_index_t *idx; idx = bam_index_load(fn[i]); iter[i] = bam_iter_query(idx, tid, beg, end); bam_index_destroy(idx); } } for (i = 0; i < n; ++i) { heap1_t *h = heap + i; h->i = i; h->b = (bam1_t*)calloc(1, sizeof(bam1_t)); if (bam_iter_read(fp[i], iter[i], h->b) >= 0) { h->pos = ((uint64_t)h->b->core.tid<<32) | (uint32_t)((int32_t)h->b->core.pos+1)<<1 | bam1_strand(h->b); h->idx = idx++; } else h->pos = HEAP_EMPTY; } if (flag & MERGE_UNCOMP) level = 0; else if (flag & MERGE_LEVEL1) level = 1; strcpy(mode, "w"); if (level >= 0) sprintf(mode + 1, "%d", level < 9? level : 9); if ((fpout = strcmp(out, "-")? bam_open(out, "w") : bam_dopen(fileno(stdout), "w")) == 0) { fprintf(stderr, "[%s] fail to create the output file.\n", __func__); return -1; } bam_header_write(fpout, hout); bam_header_destroy(hout); #ifndef _PBGZF_USE if (!(flag & MERGE_UNCOMP)) bgzf_mt(fpout, n_threads, 256); #endif ks_heapmake(heap, n, heap); while (heap->pos != HEAP_EMPTY) { bam1_t *b = heap->b; if (flag & MERGE_RG) { uint8_t *rg = bam_aux_get(b, "RG"); if (rg) bam_aux_del(b, rg); bam_aux_append(b, "RG", 'Z', RG_len[heap->i] + 1, (uint8_t*)RG[heap->i]); } bam_write1_core(fpout, &b->core, b->data_len, b->data); if ((j = bam_iter_read(fp[heap->i], iter[heap->i], b)) >= 0) { heap->pos = ((uint64_t)b->core.tid<<32) | (uint32_t)((int)b->core.pos+1)<<1 | bam1_strand(b); heap->idx = idx++; } else if (j == -1) { heap->pos = HEAP_EMPTY; free(heap->b->data); free(heap->b); heap->b = 0; } else fprintf(stderr, "[bam_merge_core] '%s' is truncated. Continue anyway.\n", fn[heap->i]); ks_heapadjust(heap, 0, n, heap); } if (flag & MERGE_RG) { for (i = 0; i != n; ++i) free(RG[i]); free(RG); free(RG_len); } for (i = 0; i != n; ++i) { bam_iter_destroy(iter[i]); bam_close(fp[i]); } bam_close(fpout); free(fp); free(heap); free(iter); return 0; }
// multi sample variant caller: CRISP, PICALL or low coverage method int multisampleVC(struct OPTIONS* options,REFLIST* reflist,FILE* fp) { if (USE_DUPLICATES ==1) BAM_FILTER_MASK = (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL); else BAM_FILTER_MASK = (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP); int bamfiles = options->bamfiles; int last=0; // last is the current position s.t. all reads have starting position > last int i=0; int h=0; unsigned long reads=0; int j=0; int prev_tid = -1; int rf=0; int finishedfiles =0; struct alignedread* pread = NULL; struct BAMFILE_data* bamfiles_data = calloc(bamfiles,sizeof(struct BAMFILE_data)); // added one extra to list to store indels for all samples combined READQUEUE* RQ = (READQUEUE*)malloc(sizeof(READQUEUE)); RQ->first = NULL; RQ->last = NULL; RQ->reads = 0; int* fcigarlist = (int*)malloc(sizeof(int)*4096); // data structure for holding potential variants and read counts, etc struct VARIANT variant; variant.ploidy = calloc(options->bamfiles,sizeof(int)); init_poolsizes(&variant,options,PICALL); init_variant(&variant,options->bamfiles,options->bamfiles); variant.options = options; // pointer to options BAMHEAP bheap; bheap.harray = (int*)malloc(sizeof(int)*bamfiles); bheap.length = bamfiles; for (i=0;i<bamfiles;i++) { bheap.harray[i] = i; bamfiles_data[i].finished= 0;} reflist->cinterval = -1; // first interval to the right of current base init_bamfiles(bamfiles_data,options->bamfilelist,bamfiles,options->regions,&options->targettid,&options->targetstart,&options->targetend); // error when reading indexed bam files probably due to lack of reads in some files resulting in heap error, fixed oct 17 2012 j=0; for (i=0;i<bamfiles;i++) { finishedfiles += bamfiles_data[i].finished; if (bamfiles_data[i].finished ==0) bheap.harray[j++] = i; else bheap.length--; } buildminheap(&bheap,bamfiles_data); // initial minheap call //fprintf(stderr,"finishedfiles %d \n",finishedfiles); if (INDEL_REALIGNMENT >=1) allocate_mem_heap(bamfiles_data,bamfiles,100); HAPLOTYPES =0,MIN_COVERAGE_FLANKING =0; for (i=0;i<variant.samples;i++) { MIN_COVERAGE_FLANKING += 2*variant.ploidy[i]; // enforced for regions outside the bedfile target HAPLOTYPES += variant.ploidy[i]; } //int min_coverage_target = 1*variant->ploidy*variant->samples; // enforced for regions outside the bedfile target int offset_readlength = 150; // call variants in window (last,current_read_position-offset_readlength) to allow for indel analysis, set to 0 for original behavior of program // the value of offset should not affect the correctness or speed of the code int current_position =0; while (finishedfiles < bamfiles) { i = bheap.harray[0]; // take the top read off the heap if ( !(bamfiles_data[i].read->flag & BAM_FILTER_MASK)) { if (bamfiles_data[i].read->tid != prev_tid) // read's chromosome is different from previousread { if (prev_tid >=0) // finish the processing of previous chromosome and cleanup { if (RQ->reads >0) { fprintf(stderr,"processing %d reads left in queue for chrom %s...",RQ->reads,reflist->names[prev_tid]); callvariants(reflist,prev_tid,last,reflist->lengths[prev_tid],RQ,bamfiles_data,options,&variant); empty_queue(RQ,bamfiles_data); //clean thequeue } if (INDEL_REALIGNMENT >=1) clean_indel_lists(bamfiles_data,bamfiles,-1); current_position = 0; for(j=0;j<bamfiles;j++) bamfiles_data[j].last=NULL; last =0; free(reflist->sequences[prev_tid]); fprintf(stderr,".....finished processing reads for chrom %s\n",reflist->names[prev_tid]); fprintf(stdout,".....finished processing reads for chrom %s\n",reflist->names[prev_tid]); reflist->cinterval = -1; // reset to -1 } read_chromosome(reflist,bamfiles_data[i].read->tid,fp); prev_tid =bamfiles_data[i].read->tid; } if (bamfiles_data[i].read->position <last) { fprintf(stderr,"reads out of order i:%d h:%d pos: %d %d\n",i,h,bamfiles_data[i].read->position,last); fprintf(stderr,"the program will now exit, please sort the bamfiles\n"); return 1; } if (INDEL_REALIGNMENT >=1 && bamfiles_data[i].read->position > current_position+offset_readlength) { // need to clean up indel lists when we encounter a new chromosome... print_indel_lists(bamfiles_data,bamfiles,current_position+offset_readlength); clean_indel_lists(bamfiles_data,bamfiles,current_position); current_position = bamfiles_data[i].read->position; } // realign reads before calling variants, each read is realigned only once // small bug here, only call variants when last is less than current read position // bug fixed here, update last only when 'callvariants' is invoked, ??? if (RQ->reads > 0 && bamfiles_data[i].read->position > last+offset_readlength) { callvariants(reflist,bamfiles_data[i].read->tid,last,bamfiles_data[i].read->position-offset_readlength,RQ,bamfiles_data,options,&variant); } last = bamfiles_data[i].read->position-offset_readlength; if (last < 0) last =0; bamfiles_data[i].read->cflag = 0; // this function should only be called on reads inside/close_to targeted regions.. parse_cigar(bamfiles_data[i].read,reflist,bamfiles_data[i].read->tid,fcigarlist); if (INDEL_REALIGNMENT >=1 && bamfiles_data[i].read->gaps > 0 && bamfiles_data[i].read->mquality >= 20) extract_indel_reads(bamfiles_data[i].read,reflist,bamfiles_data[i].read->tid,i,bamfiles_data[i].ilist); //fprintf(stdout,"read s:%d IS:%d %s %d \n",i,bamfiles_data[i].read->IS,bamfiles_data[i].read->readid,bamfiles_data[i].read->position); if (RQ->last == NULL) { RQ->last = bamfiles_data[i].read; RQ->first = RQ->last; (RQ->last)->next = NULL; RQ->reads++; } else { (RQ->last)->next = bamfiles_data[i].read; RQ->last = bamfiles_data[i].read; (RQ->last)->next = NULL; RQ->reads++; } if (bamfiles_data[i].last ==NULL) bamfiles_data[i].first = RQ->last; else bamfiles_data[i].last->nextread= RQ->last; bamfiles_data[i].last = RQ->last; (RQ->last)->nextread =NULL; // read that passes filters from 'i'th bam file is inserted in queue, should also add it to OPE queue //if (bamfiles_data[i].read->position < bamfiles_data[i].read->mateposition && bamfiles_data[i].read->lastpos > bamfiles_data[i].read->mateposition) //fprintf(stdout,"B %d %s %d %d %d \n",i,bamfiles_data[i].read->readid,bamfiles_data[i].read->position,bamfiles_data[i].read->mateposition,bamfiles_data[i].read->IS); } else free_read(bamfiles_data[i].read); //fprintf(stdout,"read from %d %d %s\n",i,bamfiles_data[i].read->position,bamfiles_data[i].read->readid); if (options->regions ==NULL) rf =samread(bamfiles_data[i].fp,bamfiles_data[i].b); else rf = bam_iter_read(bamfiles_data[i].fp->x.bam,bamfiles_data[i].iter,bamfiles_data[i].b); if (rf >=0) { bamfiles_data[i].read = get_read_bamfile(bamfiles_data[i].b,bamfiles_data[i].fp,pread); //if (options->samples ==0) bamfiles_data[i].read->sampleid = i; //else bamfiles_data[i].read->sampleid = options->BAM_TO_SAMPLE[i]; // bug here june 30 2013 commented out .... in 12 T2D pools bamfiles_data[i].read->sampleid = i; if (!(bamfiles_data[i].read->flag & BAM_FILTER_MASK)) minHeapify(&bheap,0,bamfiles_data); } else // no more reads in file 'i' { bamfiles_data[i].finished = 1; bamfiles_data[i].read= NULL; bam_destroy1(bamfiles_data[i].b); h++; finishedfiles++; //fprintf(stderr,"finished reading bam file %s \n",options->bamfilelist[i]); //return 1; bheap.harray[0] = bheap.harray[bheap.length-1]; bheap.length--; if (bheap.length > 0) minHeapify(&bheap,0,bamfiles_data); // call minheapify like function to push sample i off the heap, reduce heap size } if ((++reads)%1000000 ==0 && RQ->reads >0) fprintf(stderr,".....processed %ld reads QSIZE:%d %s:%d:%d variants called %d\n",reads,RQ->reads,RQ->first->chrom,RQ->first->position,RQ->first->lastpos,VARIANTS_CALLED); } if (prev_tid >=0) // finish the processing of last chromosome { if (RQ->reads >0) { fprintf(stderr,"processing %d reads left in queue for chrom %s.....",RQ->reads,reflist->names[prev_tid]); if (reflist->lengths[prev_tid] > last) callvariants(reflist,prev_tid,last,reflist->lengths[prev_tid],RQ,bamfiles_data,options,&variant); empty_queue(RQ,bamfiles_data); //clean thequeue } else fprintf(stderr,"queue for chrom %s is empty ",reflist->names[prev_tid]); free(reflist->sequences[prev_tid]); fprintf(stderr,"finished processing reads for chrom %s \n\n",reflist->names[prev_tid]); if (INDEL_REALIGNMENT >=1) { print_indel_lists(bamfiles_data,bamfiles,reflist->lengths[prev_tid]); clean_indel_lists(bamfiles_data,bamfiles,reflist->lengths[prev_tid]); } } fprintf(stderr,"CRISP has finished processing bam files: total reads processed %ld total variants called %d \n\n",reads,VARIANTS_CALLED); //for (i=0;i<bamfiles;i++) bam_destroy1(bamfiles_data[i].b); free(bamfiles_data); free(bheap.harray); free(fcigarlist); //empty_queue(RQ); //clean thequeue //fprintf(stdout,"FILE %d %s %d %s %d %d %d mapped %d \n",i,read->readid,read->flag,read->chrom,read->position,read->mquality,read->IS,(read->flag &4)); return 1; }
// jump to a certain chromosome in a bam file SR_Status SR_BamInStreamJump(SR_BamInStream* pBamInStream, int32_t refID, int32_t begin, int32_t end) { // if we do not have the index file return error if (pBamInStream->pBamIndex == NULL) return SR_ERR; if (pBamInStream->pBamIterator != NULL) { bam_iter_destroy(*(pBamInStream->pBamIterator)); free(pBamInStream->pBamIterator); pBamInStream->pBamIterator = NULL; } // clear the bam array before jump SR_BamInStreamReset(pBamInStream); pBamInStream->pBamIterator = (bam_iter_t*) malloc(sizeof(bam_iter_t)); // jump and read the first alignment in the given chromosome int ret; //bam_iter_t pBamIter = bam_iter_query(pBamInStream->pBamIndex, refID, begin, end); *pBamInStream->pBamIterator = bam_iter_query(pBamInStream->pBamIndex, refID, begin, end); pBamInStream->pNewNode = SR_BamNodeAlloc(pBamInStream->pMemPool); ret = bam_iter_read(pBamInStream->fpBamInput, *(pBamInStream->pBamIterator), &(pBamInStream->pNewNode->alignment)); //bam_iter_destroy(pBamIter); khash_t(queryName)* pNameHashCurr = NULL; // see if we jump to the desired chromosome if (ret > 0 && pBamInStream->pNewNode->alignment.core.tid == refID) { // exclude those reads who are non-paired-end, qc-fail, duplicate-marked, proper-paired, // both aligned, secondary-alignment and no-name-specified. if ((pBamInStream->pNewNode->alignment.core.flag & SR_BAM_FMASK) != 0 || strcmp(bam1_qname(&(pBamInStream->pNewNode->alignment)), "*") == 0) { SR_BamNodeFree(pBamInStream->pNewNode, pBamInStream->pMemPool); pBamInStream->pNewNode = NULL; pBamInStream->currBinPos = NO_QUERY_YET; } else { SR_BamListPushHead(&(pBamInStream->pAlgnLists[CURR_BIN]), pBamInStream->pNewNode); int khRet = 0; khiter_t khIter = kh_put(queryName, pBamInStream->pNameHashes[CURR_BIN], bam1_qname(&(pBamInStream->pNewNode->alignment)), &khRet); if (khRet != 0) { pNameHashCurr = pBamInStream->pNameHashes[CURR_BIN]; kh_value(pNameHashCurr, khIter) = pBamInStream->pNewNode; } else return SR_ERR; pBamInStream->currBinPos = pBamInStream->pNewNode->alignment.core.pos; pBamInStream->pNewNode = NULL; } pBamInStream->currRefID = refID; return SR_OK; } else if (ret == -1) { return SR_OUT_OF_RANGE; } else { return SR_ERR; } }