Exemplo n.º 1
0
int main()
{
    int a[] = {9,81,17,69,50,4,3,2,1};
    buildminheap(a, 9);
    int i;
    for(i = 0; i < 9; i++)
    {
        printf("%d ", a[i]);
    }
    printf("\n");
   // desheapsort(a, 9);
    for(i = 0; i < 9; i++)
    {
        printf("%d ", a[i]);
    }
    printf("\n");
    buildmaxheap(a, 9);
    for(i = 0; i < 9; i++)
    {
        printf("%d ", a[i]);
    }
    printf("\n");
    ascheapsort(a, 9);
    for(i = 0; i < 9; i++)
    {
        printf("%d ", a[i]);
    }

    return 0;
}
Exemplo n.º 2
0
// multi sample variant caller: CRISP, PICALL or low coverage method
int multisampleVC(struct OPTIONS* options,REFLIST* reflist,FILE* fp)
{
	if (USE_DUPLICATES ==1) BAM_FILTER_MASK = (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL); else BAM_FILTER_MASK = (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP);

	int bamfiles = options->bamfiles;

	int last=0; // last is the current position s.t. all reads have starting position > last
	int i=0; int h=0;
	unsigned long reads=0; int j=0; int prev_tid = -1;   int rf=0;
	int finishedfiles =0; 
	struct alignedread* pread = NULL;
	struct BAMFILE_data* bamfiles_data = calloc(bamfiles,sizeof(struct BAMFILE_data)); // added one extra to list to store indels for all samples combined

	READQUEUE* RQ = (READQUEUE*)malloc(sizeof(READQUEUE));  RQ->first = NULL; RQ->last = NULL; RQ->reads = 0; 
	int* fcigarlist = (int*)malloc(sizeof(int)*4096);

	// data structure for holding potential variants and read counts, etc 
	struct VARIANT variant;  variant.ploidy = calloc(options->bamfiles,sizeof(int)); 
	init_poolsizes(&variant,options,PICALL); 
	init_variant(&variant,options->bamfiles,options->bamfiles);
	variant.options = options;  // pointer to options

	BAMHEAP bheap; bheap.harray = (int*)malloc(sizeof(int)*bamfiles); bheap.length = bamfiles;
	for (i=0;i<bamfiles;i++) { bheap.harray[i] = i; bamfiles_data[i].finished= 0;}
	
	reflist->cinterval = -1; // first interval to the right of current base

        init_bamfiles(bamfiles_data,options->bamfilelist,bamfiles,options->regions,&options->targettid,&options->targetstart,&options->targetend);

	// error when reading indexed bam files probably due to lack of reads in some files resulting in heap error, fixed oct 17 2012
        j=0; for (i=0;i<bamfiles;i++) 
	{
		finishedfiles += bamfiles_data[i].finished; 
		if (bamfiles_data[i].finished ==0) bheap.harray[j++] = i; else bheap.length--; 	
	}
	buildminheap(&bheap,bamfiles_data); // initial minheap call
	//fprintf(stderr,"finishedfiles %d \n",finishedfiles);
	
	if (INDEL_REALIGNMENT >=1) allocate_mem_heap(bamfiles_data,bamfiles,100);

	
	HAPLOTYPES =0,MIN_COVERAGE_FLANKING =0;
	for (i=0;i<variant.samples;i++) 
	{
		MIN_COVERAGE_FLANKING += 2*variant.ploidy[i];  // enforced for regions outside the bedfile target
		HAPLOTYPES += variant.ploidy[i];
	}
	//int min_coverage_target = 1*variant->ploidy*variant->samples;  // enforced for regions outside the bedfile target
	int offset_readlength = 150;  // call variants in window (last,current_read_position-offset_readlength) to allow for indel analysis, set to 0 for original behavior of program
	// the value of offset should not affect the correctness or speed of the code
	int current_position =0;
	
	while (finishedfiles < bamfiles)
	{
		i = bheap.harray[0]; // take the top read off the heap
		if ( !(bamfiles_data[i].read->flag & BAM_FILTER_MASK))
		{
			if (bamfiles_data[i].read->tid != prev_tid) // read's chromosome is different from previousread 
			{
				if (prev_tid >=0)  // finish the processing of previous chromosome and cleanup
				{
					if (RQ->reads >0) 
					{
						fprintf(stderr,"processing %d reads left in queue for chrom %s...",RQ->reads,reflist->names[prev_tid]);
						callvariants(reflist,prev_tid,last,reflist->lengths[prev_tid],RQ,bamfiles_data,options,&variant);
						empty_queue(RQ,bamfiles_data); //clean thequeue
					}
					if (INDEL_REALIGNMENT >=1) clean_indel_lists(bamfiles_data,bamfiles,-1); current_position = 0; 
					for(j=0;j<bamfiles;j++) bamfiles_data[j].last=NULL; last =0; 
					free(reflist->sequences[prev_tid]); 
					fprintf(stderr,".....finished processing reads for chrom %s\n",reflist->names[prev_tid]);
					fprintf(stdout,".....finished processing reads for chrom %s\n",reflist->names[prev_tid]);
					reflist->cinterval = -1; // reset to -1 
				}
				read_chromosome(reflist,bamfiles_data[i].read->tid,fp); 
				prev_tid =bamfiles_data[i].read->tid;
			}

			if (bamfiles_data[i].read->position <last)
			{
				fprintf(stderr,"reads out of order i:%d h:%d pos: %d %d\n",i,h,bamfiles_data[i].read->position,last);
				fprintf(stderr,"the program will now exit, please sort the bamfiles\n");
				return 1;
			}

			if (INDEL_REALIGNMENT >=1 && bamfiles_data[i].read->position > current_position+offset_readlength) 
			{
				// need to clean up indel lists when we encounter a new chromosome... 
				print_indel_lists(bamfiles_data,bamfiles,current_position+offset_readlength); 
				clean_indel_lists(bamfiles_data,bamfiles,current_position);
				current_position = bamfiles_data[i].read->position;
			}
			// realign reads before calling variants, each read is realigned only once

			// small bug here, only call variants when last is less than current read position
			// bug fixed here, update last only when 'callvariants' is invoked, ???
			if (RQ->reads > 0 && bamfiles_data[i].read->position > last+offset_readlength) 
			{
				callvariants(reflist,bamfiles_data[i].read->tid,last,bamfiles_data[i].read->position-offset_readlength,RQ,bamfiles_data,options,&variant);  
			}
			last = bamfiles_data[i].read->position-offset_readlength; if (last < 0) last =0;

			bamfiles_data[i].read->cflag = 0; 
			// this function should only be called on reads inside/close_to targeted regions..
			parse_cigar(bamfiles_data[i].read,reflist,bamfiles_data[i].read->tid,fcigarlist); 

			if (INDEL_REALIGNMENT >=1 && bamfiles_data[i].read->gaps > 0 && bamfiles_data[i].read->mquality >= 20) extract_indel_reads(bamfiles_data[i].read,reflist,bamfiles_data[i].read->tid,i,bamfiles_data[i].ilist);
			
			//fprintf(stdout,"read s:%d IS:%d %s %d \n",i,bamfiles_data[i].read->IS,bamfiles_data[i].read->readid,bamfiles_data[i].read->position);
			if (RQ->last == NULL)
			{
				RQ->last = bamfiles_data[i].read; RQ->first = RQ->last; (RQ->last)->next = NULL;
				RQ->reads++;
			}
			else
			{
				(RQ->last)->next = bamfiles_data[i].read; RQ->last = bamfiles_data[i].read; 
				(RQ->last)->next = NULL;
				RQ->reads++;
			}
			if (bamfiles_data[i].last ==NULL) bamfiles_data[i].first = RQ->last;
			else bamfiles_data[i].last->nextread= RQ->last;
			bamfiles_data[i].last = RQ->last; (RQ->last)->nextread =NULL;
			// read that passes filters from 'i'th bam file is inserted in queue, should also add it to OPE queue 
			//if (bamfiles_data[i].read->position < bamfiles_data[i].read->mateposition && bamfiles_data[i].read->lastpos > bamfiles_data[i].read->mateposition) 
			//fprintf(stdout,"B %d %s %d %d %d \n",i,bamfiles_data[i].read->readid,bamfiles_data[i].read->position,bamfiles_data[i].read->mateposition,bamfiles_data[i].read->IS);
		}
		else free_read(bamfiles_data[i].read);
		//fprintf(stdout,"read from %d %d %s\n",i,bamfiles_data[i].read->position,bamfiles_data[i].read->readid);

		if (options->regions ==NULL) rf =samread(bamfiles_data[i].fp,bamfiles_data[i].b);
		else rf  = bam_iter_read(bamfiles_data[i].fp->x.bam,bamfiles_data[i].iter,bamfiles_data[i].b);
		if (rf >=0)
		{
			bamfiles_data[i].read = get_read_bamfile(bamfiles_data[i].b,bamfiles_data[i].fp,pread); 
			//if (options->samples ==0) bamfiles_data[i].read->sampleid = i;
			//else bamfiles_data[i].read->sampleid = options->BAM_TO_SAMPLE[i];  
			// bug here june 30 2013 commented out .... in 12 T2D pools 
			bamfiles_data[i].read->sampleid = i;
			if (!(bamfiles_data[i].read->flag & BAM_FILTER_MASK)) minHeapify(&bheap,0,bamfiles_data);
		}
		else // no more reads in file 'i' 
		{ 
			bamfiles_data[i].finished = 1; bamfiles_data[i].read= NULL; 
			bam_destroy1(bamfiles_data[i].b);
			h++; finishedfiles++; 
			//fprintf(stderr,"finished reading bam file %s \n",options->bamfilelist[i]); //return 1;
			bheap.harray[0] = bheap.harray[bheap.length-1]; bheap.length--;
			if (bheap.length > 0) minHeapify(&bheap,0,bamfiles_data);
			// call minheapify like function to push sample i off the heap, reduce heap size
		} 
		if ((++reads)%1000000 ==0 && RQ->reads >0) fprintf(stderr,".....processed %ld reads QSIZE:%d %s:%d:%d variants called %d\n",reads,RQ->reads,RQ->first->chrom,RQ->first->position,RQ->first->lastpos,VARIANTS_CALLED);
	}

	if (prev_tid >=0)  // finish the processing of last chromosome 
	{
		if (RQ->reads >0) 
		{
			fprintf(stderr,"processing %d reads left in queue for chrom %s.....",RQ->reads,reflist->names[prev_tid]);
			if (reflist->lengths[prev_tid] > last) callvariants(reflist,prev_tid,last,reflist->lengths[prev_tid],RQ,bamfiles_data,options,&variant);
			empty_queue(RQ,bamfiles_data); //clean thequeue
		}
		else fprintf(stderr,"queue for chrom %s is empty ",reflist->names[prev_tid]);
		free(reflist->sequences[prev_tid]); 
		fprintf(stderr,"finished processing reads for chrom %s \n\n",reflist->names[prev_tid]);
		if (INDEL_REALIGNMENT >=1) 
		{
			print_indel_lists(bamfiles_data,bamfiles,reflist->lengths[prev_tid]); 
			clean_indel_lists(bamfiles_data,bamfiles,reflist->lengths[prev_tid]);
		}
	}
	fprintf(stderr,"CRISP has finished processing bam files: total reads processed %ld total variants called %d \n\n",reads,VARIANTS_CALLED);

	//for (i=0;i<bamfiles;i++) bam_destroy1(bamfiles_data[i].b);
	free(bamfiles_data); free(bheap.harray); free(fcigarlist);
	//empty_queue(RQ); //clean thequeue
	//fprintf(stdout,"FILE %d %s %d %s %d %d %d mapped %d \n",i,read->readid,read->flag,read->chrom,read->position,read->mquality,read->IS,(read->flag &4));
	return 1;
}