예제 #1
0
파일: parse.c 프로젝트: rioki/d16
void read_token()
{
    clear_variant(&value);
    
    token = next_token;
    value = next_value;
    
    init_variant(&next_value);
    next_token = scan_token(&next_value);
}
예제 #2
0
파일: parse.c 프로젝트: rioki/d16
int parse(const char* file, program_t* p)
{
    int r = 0;
    FILE* fp = NULL;
    prog = p;
    prog->file = strdup(file);
    
    assert(prog != NULL);
    
    if (file != NULL)
    {        
        fp = fopen(file, "r");
        if (fp == NULL)
        {
            fprintf(stderr, "Failed to open %s\n", file);
            return -1;
        }   
    }
    else
    {
        fp = stdin;
        file = "<stdin>";
    }
    
    r = start_scan(fp, file);
    if (r < 0)
    {
        return r;
    }
    
    init_variant(&value);
    init_variant(&next_value);
    next_token = scan_token(&next_value);
    
    read_program();
    
    return result;
}
예제 #3
0
파일: parse.c 프로젝트: rioki/d16
void read_argument(arg_t* arg)
{
    assert(arg != NULL);
    
    int as_value = 0;
    unsigned int t1 = 0;
    variant_t v1;
    unsigned int t2 = 0;
    
    init_variant(&v1);
    
    read_token();
    
    if (token == LEFT_BRAKET_TOKEN)
    {
        as_value = 1;
        read_token();
    }
    else
    {
        as_value = 0;
    }
    
    if (is_value_token(token))
    {
        t1 = token;
        copy_variant(&v1, &value);
    }
    else
    {
        fprintf(stderr, "%s:%d: error: Expected identifier or integer but got %s.\n", get_scan_file(), get_scan_line(), get_token_name(token));
        result = -1;
        return;
    }
    
    if (next_token == PLUS_TOKEN)
    {
        read_token();
        assert(token == PLUS_TOKEN);
        
        read_token();
        
        if (is_value_token(token))
        {
            t2 = token;
        }
        else
        {
            fprintf(stderr, "%s:%d: error: Expected identifier or integer but got %s.\n", get_scan_file(), get_scan_line(), get_token_name(token));
            result = -1;
            return;
        }
    }
        
    if (as_value == 1)
    {
        read_token();
        if (token != RIGHT_BRAKET_TOKEN)
        {
            fprintf(stderr, "%s:%d: error: Missmatched braket.\n", get_scan_file(), get_scan_line());
            result = -1;
            return;
        }
    }
    /*
    0x00-0x07: register (A, B, C, X, Y, Z, I or J, in that order)
    0x08-0x0f: [register]
    0x10-0x17: [next word + register]
         0x18: POP / [SP++]
         0x19: PEEK / [SP]
         0x1a: PUSH / [--SP]
         0x1b: SP
         0x1c: PC
         0x1d: O
         0x1e: [next word]
         0x1f: next word (literal)
    0x20-0x3f: literal value 0x00-0x1f (literal)
    */
    
    if (is_symbolic(t1))
    {
        switch (t1)
        {
            case A_TOKEN:
                if (as_value)
                    arg->value = 0x08;
                else
                    arg->value = 0x00;
                break;
            case B_TOKEN:
                if (as_value)
                    arg->value = 0x09;
                else
                    arg->value = 0x01;
                break;
            case C_TOKEN:
                if (as_value)
                    arg->value = 0x0A;
                else
                    arg->value = 0x02;
                break;
            case X_TOKEN:
                if (as_value)
                    arg->value = 0x0B;
                else
                    arg->value = 0x03;
                break;
            case Y_TOKEN:
                if (as_value)
                    arg->value = 0x0C;
                else
                    arg->value = 0x04;
                break;
            case Z_TOKEN:
                if (as_value)
                    arg->value = 0x0D;
                else
                    arg->value = 0x05;
                break;
            case I_TOKEN:
                if (as_value)
                    arg->value = 0x0E;
                else
                    arg->value = 0x06;
                break;    
            case J_TOKEN:
                if (as_value)
                    arg->value = 0x0F;
                else
                    arg->value = 0x07;
                break;
            case POP_TOKEN:
                arg->value = 0x18;
                break;
            case PEEK_TOKEN:
                arg->value = 0x19;
                break;
            case PUSH_TOKEN:
                arg->value = 0x1A;
                break;
            case SP_TOKEN:
                arg->value = 0x1B;
                break;
            case PC_TOKEN:
                arg->value = 0x1C;
                break;
            case O_TOKEN:
                arg->value = 0x1D;
                break;
        }
        /* TODO validate that there is no offset */
    }
    else if (t1 == IDENTIFIER_TOKEN)
    {
        // for jumps we encode alwas next word
        // this is since we can't resolve the target at this point
        arg->value = 0x1F;
        copy_variant(&arg->extra, &v1);
        
        /* TODO validate that there is no offset */
    }
    else
    {
        assert(t1 == INTEGER_TOKEN);
        assert(v1.type == UINT_TYPE);
        
        if (as_value && t2 == 0)
        {
            arg->value = 0x1E;
            copy_variant(&arg->extra, &v1);
        }
        if (!as_value && t2 == 0)
        {
            if (v1.ui <= 0x1F)
            {
                arg->value = 0x20 + v1.ui;
            }
            else
            {
                arg->value =  0x1F;
                copy_variant(&arg->extra, &v1);
            }            
        }
        if (t2 != 0)
        {
            assert(as_value == 1);
            switch (t2)
            {
                case A_TOKEN:
                    arg->value = 0x10;
                    break;
                case B_TOKEN:
                    arg->value = 0x11;
                    break;
                case C_TOKEN:
                    arg->value = 0x12;
                    break;
                case X_TOKEN:
                    arg->value = 0x13;
                    break;
                case Y_TOKEN:
                    arg->value = 0x14;
                    break;
                case Z_TOKEN:
                    arg->value = 0x15;
                    break;
                case I_TOKEN:
                    arg->value = 0x16;
                    break;
                case J_TOKEN:
                    arg->value = 0x17;
                    break;      
                default:
                    assert(0 && "should never happen");
                    break;
            }
            copy_variant(&arg->extra, &v1);
        }
    }    
}
예제 #4
0
// multi sample variant caller: CRISP, PICALL or low coverage method
int multisampleVC(struct OPTIONS* options,REFLIST* reflist,FILE* fp)
{
	if (USE_DUPLICATES ==1) BAM_FILTER_MASK = (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL); else BAM_FILTER_MASK = (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP);

	int bamfiles = options->bamfiles;

	int last=0; // last is the current position s.t. all reads have starting position > last
	int i=0; int h=0;
	unsigned long reads=0; int j=0; int prev_tid = -1;   int rf=0;
	int finishedfiles =0; 
	struct alignedread* pread = NULL;
	struct BAMFILE_data* bamfiles_data = calloc(bamfiles,sizeof(struct BAMFILE_data)); // added one extra to list to store indels for all samples combined

	READQUEUE* RQ = (READQUEUE*)malloc(sizeof(READQUEUE));  RQ->first = NULL; RQ->last = NULL; RQ->reads = 0; 
	int* fcigarlist = (int*)malloc(sizeof(int)*4096);

	// data structure for holding potential variants and read counts, etc 
	struct VARIANT variant;  variant.ploidy = calloc(options->bamfiles,sizeof(int)); 
	init_poolsizes(&variant,options,PICALL); 
	init_variant(&variant,options->bamfiles,options->bamfiles);
	variant.options = options;  // pointer to options

	BAMHEAP bheap; bheap.harray = (int*)malloc(sizeof(int)*bamfiles); bheap.length = bamfiles;
	for (i=0;i<bamfiles;i++) { bheap.harray[i] = i; bamfiles_data[i].finished= 0;}
	
	reflist->cinterval = -1; // first interval to the right of current base

        init_bamfiles(bamfiles_data,options->bamfilelist,bamfiles,options->regions,&options->targettid,&options->targetstart,&options->targetend);

	// error when reading indexed bam files probably due to lack of reads in some files resulting in heap error, fixed oct 17 2012
        j=0; for (i=0;i<bamfiles;i++) 
	{
		finishedfiles += bamfiles_data[i].finished; 
		if (bamfiles_data[i].finished ==0) bheap.harray[j++] = i; else bheap.length--; 	
	}
	buildminheap(&bheap,bamfiles_data); // initial minheap call
	//fprintf(stderr,"finishedfiles %d \n",finishedfiles);
	
	if (INDEL_REALIGNMENT >=1) allocate_mem_heap(bamfiles_data,bamfiles,100);

	
	HAPLOTYPES =0,MIN_COVERAGE_FLANKING =0;
	for (i=0;i<variant.samples;i++) 
	{
		MIN_COVERAGE_FLANKING += 2*variant.ploidy[i];  // enforced for regions outside the bedfile target
		HAPLOTYPES += variant.ploidy[i];
	}
	//int min_coverage_target = 1*variant->ploidy*variant->samples;  // enforced for regions outside the bedfile target
	int offset_readlength = 150;  // call variants in window (last,current_read_position-offset_readlength) to allow for indel analysis, set to 0 for original behavior of program
	// the value of offset should not affect the correctness or speed of the code
	int current_position =0;
	
	while (finishedfiles < bamfiles)
	{
		i = bheap.harray[0]; // take the top read off the heap
		if ( !(bamfiles_data[i].read->flag & BAM_FILTER_MASK))
		{
			if (bamfiles_data[i].read->tid != prev_tid) // read's chromosome is different from previousread 
			{
				if (prev_tid >=0)  // finish the processing of previous chromosome and cleanup
				{
					if (RQ->reads >0) 
					{
						fprintf(stderr,"processing %d reads left in queue for chrom %s...",RQ->reads,reflist->names[prev_tid]);
						callvariants(reflist,prev_tid,last,reflist->lengths[prev_tid],RQ,bamfiles_data,options,&variant);
						empty_queue(RQ,bamfiles_data); //clean thequeue
					}
					if (INDEL_REALIGNMENT >=1) clean_indel_lists(bamfiles_data,bamfiles,-1); current_position = 0; 
					for(j=0;j<bamfiles;j++) bamfiles_data[j].last=NULL; last =0; 
					free(reflist->sequences[prev_tid]); 
					fprintf(stderr,".....finished processing reads for chrom %s\n",reflist->names[prev_tid]);
					fprintf(stdout,".....finished processing reads for chrom %s\n",reflist->names[prev_tid]);
					reflist->cinterval = -1; // reset to -1 
				}
				read_chromosome(reflist,bamfiles_data[i].read->tid,fp); 
				prev_tid =bamfiles_data[i].read->tid;
			}

			if (bamfiles_data[i].read->position <last)
			{
				fprintf(stderr,"reads out of order i:%d h:%d pos: %d %d\n",i,h,bamfiles_data[i].read->position,last);
				fprintf(stderr,"the program will now exit, please sort the bamfiles\n");
				return 1;
			}

			if (INDEL_REALIGNMENT >=1 && bamfiles_data[i].read->position > current_position+offset_readlength) 
			{
				// need to clean up indel lists when we encounter a new chromosome... 
				print_indel_lists(bamfiles_data,bamfiles,current_position+offset_readlength); 
				clean_indel_lists(bamfiles_data,bamfiles,current_position);
				current_position = bamfiles_data[i].read->position;
			}
			// realign reads before calling variants, each read is realigned only once

			// small bug here, only call variants when last is less than current read position
			// bug fixed here, update last only when 'callvariants' is invoked, ???
			if (RQ->reads > 0 && bamfiles_data[i].read->position > last+offset_readlength) 
			{
				callvariants(reflist,bamfiles_data[i].read->tid,last,bamfiles_data[i].read->position-offset_readlength,RQ,bamfiles_data,options,&variant);  
			}
			last = bamfiles_data[i].read->position-offset_readlength; if (last < 0) last =0;

			bamfiles_data[i].read->cflag = 0; 
			// this function should only be called on reads inside/close_to targeted regions..
			parse_cigar(bamfiles_data[i].read,reflist,bamfiles_data[i].read->tid,fcigarlist); 

			if (INDEL_REALIGNMENT >=1 && bamfiles_data[i].read->gaps > 0 && bamfiles_data[i].read->mquality >= 20) extract_indel_reads(bamfiles_data[i].read,reflist,bamfiles_data[i].read->tid,i,bamfiles_data[i].ilist);
			
			//fprintf(stdout,"read s:%d IS:%d %s %d \n",i,bamfiles_data[i].read->IS,bamfiles_data[i].read->readid,bamfiles_data[i].read->position);
			if (RQ->last == NULL)
			{
				RQ->last = bamfiles_data[i].read; RQ->first = RQ->last; (RQ->last)->next = NULL;
				RQ->reads++;
			}
			else
			{
				(RQ->last)->next = bamfiles_data[i].read; RQ->last = bamfiles_data[i].read; 
				(RQ->last)->next = NULL;
				RQ->reads++;
			}
			if (bamfiles_data[i].last ==NULL) bamfiles_data[i].first = RQ->last;
			else bamfiles_data[i].last->nextread= RQ->last;
			bamfiles_data[i].last = RQ->last; (RQ->last)->nextread =NULL;
			// read that passes filters from 'i'th bam file is inserted in queue, should also add it to OPE queue 
			//if (bamfiles_data[i].read->position < bamfiles_data[i].read->mateposition && bamfiles_data[i].read->lastpos > bamfiles_data[i].read->mateposition) 
			//fprintf(stdout,"B %d %s %d %d %d \n",i,bamfiles_data[i].read->readid,bamfiles_data[i].read->position,bamfiles_data[i].read->mateposition,bamfiles_data[i].read->IS);
		}
		else free_read(bamfiles_data[i].read);
		//fprintf(stdout,"read from %d %d %s\n",i,bamfiles_data[i].read->position,bamfiles_data[i].read->readid);

		if (options->regions ==NULL) rf =samread(bamfiles_data[i].fp,bamfiles_data[i].b);
		else rf  = bam_iter_read(bamfiles_data[i].fp->x.bam,bamfiles_data[i].iter,bamfiles_data[i].b);
		if (rf >=0)
		{
			bamfiles_data[i].read = get_read_bamfile(bamfiles_data[i].b,bamfiles_data[i].fp,pread); 
			//if (options->samples ==0) bamfiles_data[i].read->sampleid = i;
			//else bamfiles_data[i].read->sampleid = options->BAM_TO_SAMPLE[i];  
			// bug here june 30 2013 commented out .... in 12 T2D pools 
			bamfiles_data[i].read->sampleid = i;
			if (!(bamfiles_data[i].read->flag & BAM_FILTER_MASK)) minHeapify(&bheap,0,bamfiles_data);
		}
		else // no more reads in file 'i' 
		{ 
			bamfiles_data[i].finished = 1; bamfiles_data[i].read= NULL; 
			bam_destroy1(bamfiles_data[i].b);
			h++; finishedfiles++; 
			//fprintf(stderr,"finished reading bam file %s \n",options->bamfilelist[i]); //return 1;
			bheap.harray[0] = bheap.harray[bheap.length-1]; bheap.length--;
			if (bheap.length > 0) minHeapify(&bheap,0,bamfiles_data);
			// call minheapify like function to push sample i off the heap, reduce heap size
		} 
		if ((++reads)%1000000 ==0 && RQ->reads >0) fprintf(stderr,".....processed %ld reads QSIZE:%d %s:%d:%d variants called %d\n",reads,RQ->reads,RQ->first->chrom,RQ->first->position,RQ->first->lastpos,VARIANTS_CALLED);
	}

	if (prev_tid >=0)  // finish the processing of last chromosome 
	{
		if (RQ->reads >0) 
		{
			fprintf(stderr,"processing %d reads left in queue for chrom %s.....",RQ->reads,reflist->names[prev_tid]);
			if (reflist->lengths[prev_tid] > last) callvariants(reflist,prev_tid,last,reflist->lengths[prev_tid],RQ,bamfiles_data,options,&variant);
			empty_queue(RQ,bamfiles_data); //clean thequeue
		}
		else fprintf(stderr,"queue for chrom %s is empty ",reflist->names[prev_tid]);
		free(reflist->sequences[prev_tid]); 
		fprintf(stderr,"finished processing reads for chrom %s \n\n",reflist->names[prev_tid]);
		if (INDEL_REALIGNMENT >=1) 
		{
			print_indel_lists(bamfiles_data,bamfiles,reflist->lengths[prev_tid]); 
			clean_indel_lists(bamfiles_data,bamfiles,reflist->lengths[prev_tid]);
		}
	}
	fprintf(stderr,"CRISP has finished processing bam files: total reads processed %ld total variants called %d \n\n",reads,VARIANTS_CALLED);

	//for (i=0;i<bamfiles;i++) bam_destroy1(bamfiles_data[i].b);
	free(bamfiles_data); free(bheap.harray); free(fcigarlist);
	//empty_queue(RQ); //clean thequeue
	//fprintf(stdout,"FILE %d %s %d %s %d %d %d mapped %d \n",i,read->readid,read->flag,read->chrom,read->position,read->mquality,read->IS,(read->flag &4));
	return 1;
}