void read_token() { clear_variant(&value); token = next_token; value = next_value; init_variant(&next_value); next_token = scan_token(&next_value); }
int parse(const char* file, program_t* p) { int r = 0; FILE* fp = NULL; prog = p; prog->file = strdup(file); assert(prog != NULL); if (file != NULL) { fp = fopen(file, "r"); if (fp == NULL) { fprintf(stderr, "Failed to open %s\n", file); return -1; } } else { fp = stdin; file = "<stdin>"; } r = start_scan(fp, file); if (r < 0) { return r; } init_variant(&value); init_variant(&next_value); next_token = scan_token(&next_value); read_program(); return result; }
void read_argument(arg_t* arg) { assert(arg != NULL); int as_value = 0; unsigned int t1 = 0; variant_t v1; unsigned int t2 = 0; init_variant(&v1); read_token(); if (token == LEFT_BRAKET_TOKEN) { as_value = 1; read_token(); } else { as_value = 0; } if (is_value_token(token)) { t1 = token; copy_variant(&v1, &value); } else { fprintf(stderr, "%s:%d: error: Expected identifier or integer but got %s.\n", get_scan_file(), get_scan_line(), get_token_name(token)); result = -1; return; } if (next_token == PLUS_TOKEN) { read_token(); assert(token == PLUS_TOKEN); read_token(); if (is_value_token(token)) { t2 = token; } else { fprintf(stderr, "%s:%d: error: Expected identifier or integer but got %s.\n", get_scan_file(), get_scan_line(), get_token_name(token)); result = -1; return; } } if (as_value == 1) { read_token(); if (token != RIGHT_BRAKET_TOKEN) { fprintf(stderr, "%s:%d: error: Missmatched braket.\n", get_scan_file(), get_scan_line()); result = -1; return; } } /* 0x00-0x07: register (A, B, C, X, Y, Z, I or J, in that order) 0x08-0x0f: [register] 0x10-0x17: [next word + register] 0x18: POP / [SP++] 0x19: PEEK / [SP] 0x1a: PUSH / [--SP] 0x1b: SP 0x1c: PC 0x1d: O 0x1e: [next word] 0x1f: next word (literal) 0x20-0x3f: literal value 0x00-0x1f (literal) */ if (is_symbolic(t1)) { switch (t1) { case A_TOKEN: if (as_value) arg->value = 0x08; else arg->value = 0x00; break; case B_TOKEN: if (as_value) arg->value = 0x09; else arg->value = 0x01; break; case C_TOKEN: if (as_value) arg->value = 0x0A; else arg->value = 0x02; break; case X_TOKEN: if (as_value) arg->value = 0x0B; else arg->value = 0x03; break; case Y_TOKEN: if (as_value) arg->value = 0x0C; else arg->value = 0x04; break; case Z_TOKEN: if (as_value) arg->value = 0x0D; else arg->value = 0x05; break; case I_TOKEN: if (as_value) arg->value = 0x0E; else arg->value = 0x06; break; case J_TOKEN: if (as_value) arg->value = 0x0F; else arg->value = 0x07; break; case POP_TOKEN: arg->value = 0x18; break; case PEEK_TOKEN: arg->value = 0x19; break; case PUSH_TOKEN: arg->value = 0x1A; break; case SP_TOKEN: arg->value = 0x1B; break; case PC_TOKEN: arg->value = 0x1C; break; case O_TOKEN: arg->value = 0x1D; break; } /* TODO validate that there is no offset */ } else if (t1 == IDENTIFIER_TOKEN) { // for jumps we encode alwas next word // this is since we can't resolve the target at this point arg->value = 0x1F; copy_variant(&arg->extra, &v1); /* TODO validate that there is no offset */ } else { assert(t1 == INTEGER_TOKEN); assert(v1.type == UINT_TYPE); if (as_value && t2 == 0) { arg->value = 0x1E; copy_variant(&arg->extra, &v1); } if (!as_value && t2 == 0) { if (v1.ui <= 0x1F) { arg->value = 0x20 + v1.ui; } else { arg->value = 0x1F; copy_variant(&arg->extra, &v1); } } if (t2 != 0) { assert(as_value == 1); switch (t2) { case A_TOKEN: arg->value = 0x10; break; case B_TOKEN: arg->value = 0x11; break; case C_TOKEN: arg->value = 0x12; break; case X_TOKEN: arg->value = 0x13; break; case Y_TOKEN: arg->value = 0x14; break; case Z_TOKEN: arg->value = 0x15; break; case I_TOKEN: arg->value = 0x16; break; case J_TOKEN: arg->value = 0x17; break; default: assert(0 && "should never happen"); break; } copy_variant(&arg->extra, &v1); } } }
// multi sample variant caller: CRISP, PICALL or low coverage method int multisampleVC(struct OPTIONS* options,REFLIST* reflist,FILE* fp) { if (USE_DUPLICATES ==1) BAM_FILTER_MASK = (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL); else BAM_FILTER_MASK = (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP); int bamfiles = options->bamfiles; int last=0; // last is the current position s.t. all reads have starting position > last int i=0; int h=0; unsigned long reads=0; int j=0; int prev_tid = -1; int rf=0; int finishedfiles =0; struct alignedread* pread = NULL; struct BAMFILE_data* bamfiles_data = calloc(bamfiles,sizeof(struct BAMFILE_data)); // added one extra to list to store indels for all samples combined READQUEUE* RQ = (READQUEUE*)malloc(sizeof(READQUEUE)); RQ->first = NULL; RQ->last = NULL; RQ->reads = 0; int* fcigarlist = (int*)malloc(sizeof(int)*4096); // data structure for holding potential variants and read counts, etc struct VARIANT variant; variant.ploidy = calloc(options->bamfiles,sizeof(int)); init_poolsizes(&variant,options,PICALL); init_variant(&variant,options->bamfiles,options->bamfiles); variant.options = options; // pointer to options BAMHEAP bheap; bheap.harray = (int*)malloc(sizeof(int)*bamfiles); bheap.length = bamfiles; for (i=0;i<bamfiles;i++) { bheap.harray[i] = i; bamfiles_data[i].finished= 0;} reflist->cinterval = -1; // first interval to the right of current base init_bamfiles(bamfiles_data,options->bamfilelist,bamfiles,options->regions,&options->targettid,&options->targetstart,&options->targetend); // error when reading indexed bam files probably due to lack of reads in some files resulting in heap error, fixed oct 17 2012 j=0; for (i=0;i<bamfiles;i++) { finishedfiles += bamfiles_data[i].finished; if (bamfiles_data[i].finished ==0) bheap.harray[j++] = i; else bheap.length--; } buildminheap(&bheap,bamfiles_data); // initial minheap call //fprintf(stderr,"finishedfiles %d \n",finishedfiles); if (INDEL_REALIGNMENT >=1) allocate_mem_heap(bamfiles_data,bamfiles,100); HAPLOTYPES =0,MIN_COVERAGE_FLANKING =0; for (i=0;i<variant.samples;i++) { MIN_COVERAGE_FLANKING += 2*variant.ploidy[i]; // enforced for regions outside the bedfile target HAPLOTYPES += variant.ploidy[i]; } //int min_coverage_target = 1*variant->ploidy*variant->samples; // enforced for regions outside the bedfile target int offset_readlength = 150; // call variants in window (last,current_read_position-offset_readlength) to allow for indel analysis, set to 0 for original behavior of program // the value of offset should not affect the correctness or speed of the code int current_position =0; while (finishedfiles < bamfiles) { i = bheap.harray[0]; // take the top read off the heap if ( !(bamfiles_data[i].read->flag & BAM_FILTER_MASK)) { if (bamfiles_data[i].read->tid != prev_tid) // read's chromosome is different from previousread { if (prev_tid >=0) // finish the processing of previous chromosome and cleanup { if (RQ->reads >0) { fprintf(stderr,"processing %d reads left in queue for chrom %s...",RQ->reads,reflist->names[prev_tid]); callvariants(reflist,prev_tid,last,reflist->lengths[prev_tid],RQ,bamfiles_data,options,&variant); empty_queue(RQ,bamfiles_data); //clean thequeue } if (INDEL_REALIGNMENT >=1) clean_indel_lists(bamfiles_data,bamfiles,-1); current_position = 0; for(j=0;j<bamfiles;j++) bamfiles_data[j].last=NULL; last =0; free(reflist->sequences[prev_tid]); fprintf(stderr,".....finished processing reads for chrom %s\n",reflist->names[prev_tid]); fprintf(stdout,".....finished processing reads for chrom %s\n",reflist->names[prev_tid]); reflist->cinterval = -1; // reset to -1 } read_chromosome(reflist,bamfiles_data[i].read->tid,fp); prev_tid =bamfiles_data[i].read->tid; } if (bamfiles_data[i].read->position <last) { fprintf(stderr,"reads out of order i:%d h:%d pos: %d %d\n",i,h,bamfiles_data[i].read->position,last); fprintf(stderr,"the program will now exit, please sort the bamfiles\n"); return 1; } if (INDEL_REALIGNMENT >=1 && bamfiles_data[i].read->position > current_position+offset_readlength) { // need to clean up indel lists when we encounter a new chromosome... print_indel_lists(bamfiles_data,bamfiles,current_position+offset_readlength); clean_indel_lists(bamfiles_data,bamfiles,current_position); current_position = bamfiles_data[i].read->position; } // realign reads before calling variants, each read is realigned only once // small bug here, only call variants when last is less than current read position // bug fixed here, update last only when 'callvariants' is invoked, ??? if (RQ->reads > 0 && bamfiles_data[i].read->position > last+offset_readlength) { callvariants(reflist,bamfiles_data[i].read->tid,last,bamfiles_data[i].read->position-offset_readlength,RQ,bamfiles_data,options,&variant); } last = bamfiles_data[i].read->position-offset_readlength; if (last < 0) last =0; bamfiles_data[i].read->cflag = 0; // this function should only be called on reads inside/close_to targeted regions.. parse_cigar(bamfiles_data[i].read,reflist,bamfiles_data[i].read->tid,fcigarlist); if (INDEL_REALIGNMENT >=1 && bamfiles_data[i].read->gaps > 0 && bamfiles_data[i].read->mquality >= 20) extract_indel_reads(bamfiles_data[i].read,reflist,bamfiles_data[i].read->tid,i,bamfiles_data[i].ilist); //fprintf(stdout,"read s:%d IS:%d %s %d \n",i,bamfiles_data[i].read->IS,bamfiles_data[i].read->readid,bamfiles_data[i].read->position); if (RQ->last == NULL) { RQ->last = bamfiles_data[i].read; RQ->first = RQ->last; (RQ->last)->next = NULL; RQ->reads++; } else { (RQ->last)->next = bamfiles_data[i].read; RQ->last = bamfiles_data[i].read; (RQ->last)->next = NULL; RQ->reads++; } if (bamfiles_data[i].last ==NULL) bamfiles_data[i].first = RQ->last; else bamfiles_data[i].last->nextread= RQ->last; bamfiles_data[i].last = RQ->last; (RQ->last)->nextread =NULL; // read that passes filters from 'i'th bam file is inserted in queue, should also add it to OPE queue //if (bamfiles_data[i].read->position < bamfiles_data[i].read->mateposition && bamfiles_data[i].read->lastpos > bamfiles_data[i].read->mateposition) //fprintf(stdout,"B %d %s %d %d %d \n",i,bamfiles_data[i].read->readid,bamfiles_data[i].read->position,bamfiles_data[i].read->mateposition,bamfiles_data[i].read->IS); } else free_read(bamfiles_data[i].read); //fprintf(stdout,"read from %d %d %s\n",i,bamfiles_data[i].read->position,bamfiles_data[i].read->readid); if (options->regions ==NULL) rf =samread(bamfiles_data[i].fp,bamfiles_data[i].b); else rf = bam_iter_read(bamfiles_data[i].fp->x.bam,bamfiles_data[i].iter,bamfiles_data[i].b); if (rf >=0) { bamfiles_data[i].read = get_read_bamfile(bamfiles_data[i].b,bamfiles_data[i].fp,pread); //if (options->samples ==0) bamfiles_data[i].read->sampleid = i; //else bamfiles_data[i].read->sampleid = options->BAM_TO_SAMPLE[i]; // bug here june 30 2013 commented out .... in 12 T2D pools bamfiles_data[i].read->sampleid = i; if (!(bamfiles_data[i].read->flag & BAM_FILTER_MASK)) minHeapify(&bheap,0,bamfiles_data); } else // no more reads in file 'i' { bamfiles_data[i].finished = 1; bamfiles_data[i].read= NULL; bam_destroy1(bamfiles_data[i].b); h++; finishedfiles++; //fprintf(stderr,"finished reading bam file %s \n",options->bamfilelist[i]); //return 1; bheap.harray[0] = bheap.harray[bheap.length-1]; bheap.length--; if (bheap.length > 0) minHeapify(&bheap,0,bamfiles_data); // call minheapify like function to push sample i off the heap, reduce heap size } if ((++reads)%1000000 ==0 && RQ->reads >0) fprintf(stderr,".....processed %ld reads QSIZE:%d %s:%d:%d variants called %d\n",reads,RQ->reads,RQ->first->chrom,RQ->first->position,RQ->first->lastpos,VARIANTS_CALLED); } if (prev_tid >=0) // finish the processing of last chromosome { if (RQ->reads >0) { fprintf(stderr,"processing %d reads left in queue for chrom %s.....",RQ->reads,reflist->names[prev_tid]); if (reflist->lengths[prev_tid] > last) callvariants(reflist,prev_tid,last,reflist->lengths[prev_tid],RQ,bamfiles_data,options,&variant); empty_queue(RQ,bamfiles_data); //clean thequeue } else fprintf(stderr,"queue for chrom %s is empty ",reflist->names[prev_tid]); free(reflist->sequences[prev_tid]); fprintf(stderr,"finished processing reads for chrom %s \n\n",reflist->names[prev_tid]); if (INDEL_REALIGNMENT >=1) { print_indel_lists(bamfiles_data,bamfiles,reflist->lengths[prev_tid]); clean_indel_lists(bamfiles_data,bamfiles,reflist->lengths[prev_tid]); } } fprintf(stderr,"CRISP has finished processing bam files: total reads processed %ld total variants called %d \n\n",reads,VARIANTS_CALLED); //for (i=0;i<bamfiles;i++) bam_destroy1(bamfiles_data[i].b); free(bamfiles_data); free(bheap.harray); free(fcigarlist); //empty_queue(RQ); //clean thequeue //fprintf(stdout,"FILE %d %s %d %s %d %d %d mapped %d \n",i,read->readid,read->flag,read->chrom,read->position,read->mquality,read->IS,(read->flag &4)); return 1; }