static int translate_chromosomepos_universal (unsigned int *genomicstart, unsigned int *genomiclength, char *chromosome, unsigned int left, unsigned int length, IIT_T chromosome_iit) { int rc = 1, index; Interval_T interval; #ifdef DEBUG bool allocp; #endif if ((index = IIT_find_linear(chromosome_iit,chromosome)) >= 0) { debug(printf("chromosome %s => index %d\n",chromosome,index)); interval = IIT_interval(chromosome_iit,index); debug(printf(" => label %s with interval low %u\n", IIT_label(chromosome_iit,index,&allocp),Interval_low(interval))); *genomicstart = Interval_low(interval)+left; if (*genomicstart < Interval_low(interval)) { fprintf(stderr,"%u + %u = %u (exceeds a 32-bit unsigned int)\n", Interval_low(interval),left,*genomicstart); exit(9); } if (length == 0) { *genomiclength = Interval_length(interval)-left; } else { *genomiclength = length; } rc = 0; } return rc; }
static void find_positions (bool *revcompp, Genomicpos_T *leftposition, Genomicpos_T *rightposition, Genomicpos_T *startposition, Genomicpos_T *endposition, Genomicpos_T *truelength, int *contigtype, char *accession, IIT_T contig_iit) { int index; Interval_T interval; char firstchar; if ((index = IIT_find_one(contig_iit,accession)) == -1) { fprintf(stderr,"Can't find accession %s in contig IIT file\n", accession); exit(9); } else { interval = IIT_interval(contig_iit,index); *leftposition = Interval_low(interval); *rightposition = Interval_high(interval); if (IIT_version(contig_iit) <= 1) { firstchar = IIT_annotation_firstchar(contig_iit,index); if (firstchar == '-') { *revcompp = true; *startposition = Interval_high(interval) + 1U; *endposition = Interval_low(interval) + 1U; } else { *revcompp = false; *startposition = Interval_low(interval); *endposition = Interval_high(interval); } } else { if (Interval_sign(interval) < 0) { *revcompp = true; *startposition = Interval_high(interval) + 1U; *endposition = Interval_low(interval) + 1U; } else { *revcompp = false; *startposition = Interval_low(interval); *endposition = Interval_high(interval); } } *truelength = Interval_length(interval); *contigtype = Interval_type(interval); debug(printf("revcompp = %d, leftposition = %d, rightposition = %d, startposition = %d, endposition = %d\n", *revcompp,*leftposition,*rightposition,*startposition,*endposition)); return; } }
/* Need to store just the part of the query specified (e.g., 1..10) */ static double compute_logtotal_tally (long int *total, int *n, Chrpos_T coordstart, Chrpos_T coordend, int indexi, IIT_T iit) { double logtotal = 0.0; Interval_T interval; char *annotation, *restofheader, *ptr; bool allocp; Chrpos_T chrpos, intervalend; long int count; annotation = IIT_annotation(&restofheader,iit,indexi,&allocp); interval = IIT_interval(iit,indexi); chrpos = Interval_low(interval); intervalend = Interval_high(interval); ptr = annotation; while (chrpos < coordstart) { if ((ptr = index(ptr,'\n')) == NULL) { fprintf(stderr,"Premature end of tally from %u to %u\n", Interval_low(interval),Interval_high(interval)); return logtotal; } else { ptr++; } chrpos++; } while (chrpos <= intervalend && chrpos <= coordend) { count = 0; ptr = get_total_tally(&count,ptr); logtotal += log((double) count + 1.0); *total += count; *n += 1; ptr++; chrpos++; } if (allocp == true) { FREE(restofheader); } return logtotal; }
static int process_snp_block (int *nwarnings, Positionsptr_T *offsets, Genomicpos_T *positions, Interval_T *intervals, int nintervals, Genomicpos_T chroffset, Genome_T genome, UINT4 *snp_blocks, int divno, char *divstring, int intervali, IIT_T snps_iit, IIT_T chromosome_iit, int index1part) { int nerrors = 0; bool *snpp; char *refstring; char *altstring; #ifdef DEBUG1 char *nt; #endif int index, length; int nsnps, stringi, starti, shift, i, k; char *snptype, *label, refnt, altnt; unsigned int ptr; Genomicpos_T snpposition, startposition, endposition, first_snppos, last_snppos, position, chrpos; Chrnum_T chrnum; Interval_T interval; int nunknowns; bool badcharp, allocp; Uintlist_T oligomers, newoligomers, p; Storedoligomer_T oligo; #ifdef WORDS_BIGENDIAN UINT4 high, low, flags; #endif /* Subtract 1 because snps_iit is 1-based */ first_snppos = Interval_low(intervals[0]) - 1U; last_snppos = Interval_low(intervals[nintervals-1]) - 1U; debug1( if (nintervals == 1) { printf("Processing snp at chrpos %s:%u\n",divstring,first_snppos+1U); } else { printf("Processing block of %d snps from chrpos %s:%u to %u\n", nintervals,divstring,first_snppos+1U,last_snppos+1U); } );
static int translate_contig (unsigned int *genomicstart, unsigned int *genomiclength, char *contig, unsigned int left, unsigned int length, IIT_T contig_iit) { int rc = 1, index; Interval_T interval; if ((index = IIT_find_one(contig_iit,contig)) >= 0) { interval = IIT_interval(contig_iit,index); *genomicstart = Interval_low(interval)+left; if (length == 0) { *genomiclength = Interval_length(interval)-left; } else { *genomiclength = length; } rc = 0; } return rc; }
/* Need to store just the part of the query specified (e.g., 1..10) */ static void print_interval_runlength (Chrpos_T *lastcoord, char *divstring, Chrpos_T coordstart, Chrpos_T coordend, int indexi, IIT_T iit, bool zeroesp) { Interval_T interval; char *label; bool allocp; Chrpos_T chrpos, intervalend; int value; label = IIT_label(iit,indexi,&allocp); value = atoi(label); interval = IIT_interval(iit,indexi); chrpos = Interval_low(interval); intervalend = Interval_high(interval); if (zeroesp == true) { while (*lastcoord < chrpos) { printf("%s\t%u\t%d\n",divstring,*lastcoord,0); (*lastcoord)++; } } while (chrpos < coordstart) { chrpos++; } while (chrpos <= intervalend && chrpos <= coordend) { printf("%s\t%u\t%d\n",divstring,chrpos,value); chrpos++; } *lastcoord = chrpos; if (allocp == true) { FREE(label); } return; }
/* coordstart used only if centerp or tallyp is true */ static long int print_interval (Chrpos_T *lastcoord, long int total, char *divstring, Chrpos_T coordstart, Chrpos_T coordend, int index, IIT_T iit, int ndivs, int fieldint) { Interval_T interval; char *label, *annotation, *restofheader; bool allocp; if (centerp == true) { print_interval_centered(divstring,coordstart,index,iit,fieldint); return 0; } else if (tallyp == true) { total += print_interval_tally(&(*lastcoord),divstring,coordstart,coordend,index,iit,zeroesp); return total; } else if (runlengthp == true) { print_interval_runlength(&(*lastcoord),divstring,coordstart,coordend,index,iit,zeroesp); return 0; } if (annotationonlyp == false) { label = IIT_label(iit,index,&allocp); printf(">%s ",label); if (allocp == true) { FREE(label); } if (ndivs > 1) { if (divstring == NULL) { /* For example, if interval was retrieved by label */ divstring = IIT_divstring_from_index(iit,index); } printf("%s:",divstring); } debug(printf("index is %d\n",index)); interval = IIT_interval(iit,index); if (signedp == false) { printf("%u..%u",Interval_low(interval),Interval_high(interval)); } else if (Interval_sign(interval) < 0) { printf("%u..%u",Interval_high(interval),Interval_low(interval)); } else { printf("%u..%u",Interval_low(interval),Interval_high(interval)); } if (Interval_type(interval) > 0) { printf(" %s",IIT_typestring(iit,Interval_type(interval))); } #if 0 /* Unnecessary because of "\n" after restofheader below */ if (IIT_version(iit) < 5) { printf("\n"); } #endif } if (fieldint < 0) { annotation = IIT_annotation(&restofheader,iit,index,&allocp); printf("%s\n",restofheader); printf("%s",annotation); if (allocp == true) { FREE(restofheader); } } else { annotation = IIT_annotation(&restofheader,iit,index,&allocp); printf("%s\n",restofheader); if (allocp == true) { FREE(restofheader); } annotation = IIT_fieldvalue(iit,index,fieldint); printf("%s\n",annotation); FREE(annotation); } return 0; }
/* Need to store just the part of the query specified (e.g., 1..10) */ static long int print_interval_tally (Chrpos_T *lastcoord, char *divstring, Chrpos_T coordstart, Chrpos_T coordend, int indexi, IIT_T iit, bool zeroesp) { long int total = 0, subtotal; Interval_T interval; char *annotation, *restofheader, *ptr, *nextptr; bool allocp; Chrpos_T chrpos, intervalend; annotation = IIT_annotation(&restofheader,iit,indexi,&allocp); interval = IIT_interval(iit,indexi); chrpos = Interval_low(interval); intervalend = Interval_high(interval); ptr = annotation; if (zeroesp == true) { while (*lastcoord < chrpos) { if (statsp == false) { printf("%s\t%u\t%d\t\n",divstring,*lastcoord,0); } (*lastcoord)++; } } while (chrpos < coordstart) { if ((ptr = index(ptr,'\n')) == NULL) { fprintf(stderr,"Premature end of tally from %u to %u\n", Interval_low(interval),Interval_high(interval)); return total; } else { ptr++; } chrpos++; } while (chrpos <= intervalend && chrpos <= coordend) { subtotal = 0; nextptr = get_total_tally(&subtotal,ptr); if (subtotal > 0 || zeroesp == true) { if (statsp == false) { printf("%s\t%u\t%ld\t",divstring,chrpos,total); print_line(ptr); printf("\n"); } } total += subtotal; ptr = nextptr; if ((ptr = index(ptr,'\n')) == NULL) { fprintf(stderr,"Premature end of tally from %u to %u\n", Interval_low(interval),Interval_high(interval)); return total; } else { ptr++; } chrpos++; } *lastcoord = chrpos; if (allocp == true) { FREE(restofheader); } return total; }
/* Need to store just the part of the query specified (e.g., 1..10) */ static void print_interval_centered (char *divstring, Chrpos_T coordstart, int index, IIT_T iit, int fieldint) { Interval_T interval; char *label, *annotation, *restofheader, centerchar; bool allocp; int annotlength, left, centerpos; if (fieldint < 0) { annotation = IIT_annotation(&restofheader,iit,index,&allocp); if (allocp == true) { FREE(restofheader); } } else { annotation = IIT_fieldvalue(iit,index,fieldint); allocp = true; } annotlength = strlen(annotation); if (annotation[annotlength-1] == '\n') { annotlength--; } interval = IIT_interval(iit,index); left = coordstart - Interval_low(interval); /* + length(query) - queryend */ if (Interval_sign(interval) < 0) { centerpos = annotlength-left-1; } else { centerpos = left; } centerchar = annotation[centerpos]; if (centeruc == true && islower(centerchar)) { if (fieldint >= 0 && allocp == true) { FREE(annotation); } } else { print_spaces(centerlength-left); if (Interval_sign(interval) < 0) { print_complement(annotation,annotlength-1,centerpos+1); printf("[%c]",complCode[(int) centerchar]); print_complement(annotation,centerpos-1,0); } else { print_forward(annotation,0,centerpos-1); printf("[%c]",centerchar); print_forward(annotation,centerpos+1,annotlength-1); } print_spaces(centerlength+left-annotlength); if (fieldint >= 0 && allocp == true) { FREE(annotation); } printf("\t"); if (Interval_type(interval) > 0) { printf("%s\t",IIT_typestring(iit,Interval_type(interval))); } if (divstring != NULL) { if (Interval_sign(interval) < 0) { printf("-%s:",divstring); } else { printf("+%s:",divstring); } } if (signedp == false) { printf("%u..%u",Interval_low(interval),Interval_high(interval)); } else if (Interval_sign(interval) < 0) { printf("%u..%u",Interval_high(interval),Interval_low(interval)); } else { printf("%u..%u",Interval_low(interval),Interval_high(interval)); } printf("\t"); label = IIT_label(iit,index,&allocp); printf("%s",label); if (allocp == true) { FREE(label); } printf("\n"); } return; }
bool Parserange_universal_iit (char **div, bool *revcomp, Genomicpos_T *genomicstart, Genomicpos_T *genomiclength, Genomicpos_T *chrstart, Genomicpos_T *chrend, Genomicpos_T *chroffset, Genomicpos_T *chrlength, char *query, IIT_T chromosome_iit, IIT_T contig_iit) { char *coords; Genomicpos_T result, left, length; Interval_T interval; int theindex; int rc; *revcomp = false; if (index(query,':')) { /* Segment must be a genome, chromosome, or contig */ debug(printf("Parsed query %s into ",query)); *div = strtok(query,":"); if ((*div)[0] == '+') { *revcomp = false; *div = &((*div)[1]); } else if ((*div)[0] == '_') { *revcomp = true; *div = &((*div)[1]); } coords = strtok(NULL,":"); debug(printf("segment %s and coords %s\n",*div,coords)); debug(printf("Interpreting segment %s as a chromosome\n",*div)); if (coords == NULL) { debug(printf(" entire chromosome\n")); rc = translate_chromosomepos_universal(&(*genomicstart),&(*genomiclength),*div,left=0,length=0,chromosome_iit); } else if (isnumberp(&result,coords)) { debug(printf(" and coords %s as a number\n",coords)); rc = translate_chromosomepos_universal(&(*genomicstart),&(*genomiclength),*div,left=result-1,length=1,chromosome_iit); } else if (isrange(&left,&length,&(*revcomp),coords)) { debug(printf(" and coords %s as a range starting at %u with length %u and revcomp = %d\n", coords,left,length,*revcomp)); rc = translate_chromosomepos_universal(&(*genomicstart),&(*genomiclength),*div,left,length,chromosome_iit); } else { debug(printf(" but coords %s is neither a number nor a range\n",coords)); rc = -1; } /* Compute chromosomal coordinates */ *chrstart = left; *chrend = *chrstart + *genomiclength; *chrstart += 1U; /* Make 1-based */ /* Get chromosomal information */ if ((theindex = IIT_find_one(chromosome_iit,*div)) < 0) { fprintf(stderr,"Cannot find chromosome %s in chromosome IIT file\n",*div); /* exit(9); */ } else { interval = IIT_interval(chromosome_iit,theindex); *chroffset = Interval_low(interval); *chrlength = Interval_length(interval); } if (rc != 0) { /* Try contig */ debug(printf("Interpreting segment %s as a contig\n",*div)); if (isnumberp(&result,coords)) { debug(printf(" and coords %s as a number\n",coords)); rc = translate_contig(&(*genomicstart),&(*genomiclength),*div,left=result-1,length=1,contig_iit); } else if (isrange(&left,&length,&(*revcomp),coords)) { debug(printf(" and coords %s as a range starting at %u with length %u and revcomp = %d\n", coords,left,length,*revcomp)); rc = translate_contig(&(*genomicstart),&(*genomiclength),*div,left,length,contig_iit); } else { debug(printf(" but coords %s is neither a number nor a range\n",coords)); rc = -1; } } if (rc != 0) { fprintf(stderr,"Can't find coordinates %s:%s\n",*div,coords); return false; } else { return true; } } else { /* Query must be a genomic position, genomic range, or contig */ *chrstart = *chroffset = *chrlength = 0; debug(printf("Parsed query %s as atomic ",query)); if (isnumberp(&result,query)) { debug(printf("number\n")); *genomicstart = result-1; *genomiclength = 1; } else if (isrange(&left,&length,&(*revcomp),query)) { debug(printf("range\n")); *genomicstart = left; *genomiclength = length; } else { debug(printf("contig\n")); return false; #if 0 rc = translate_contig(&(*genomicstart),&(*genomiclength),query,left=0,length=0,contig_iit); IIT_free(&contig_iit); #endif } *div = convert_to_chrpos_iit(&(*chrstart),chromosome_iit,*genomicstart); *chrend = *chrstart + *genomiclength; *chrstart += 1U; /* Make 1-based */ /* Try chromosome first */ if ((theindex = IIT_find_one(chromosome_iit,*div)) < 0) { fprintf(stderr,"Cannot find chromosome %s in chromosome IIT file\n",*div); return false; } else { interval = IIT_interval(chromosome_iit,theindex); *chroffset = Interval_low(interval); *chrlength = Interval_length(interval); return true; } } }
bool Parserange_universal (char **div, bool *revcomp, Genomicpos_T *genomicstart, Genomicpos_T *genomiclength, Genomicpos_T *chrstart, Genomicpos_T *chrend, Genomicpos_T *chroffset, Genomicpos_T *chrlength, char *query, char *genomesubdir, char *fileroot) { char *coords, *filename; Genomicpos_T result, left, length; IIT_T chromosome_iit, contig_iit; Interval_T interval; int theindex; int rc; *revcomp = false; if (index(query,':')) { /* Segment must be a genome, chromosome, or contig */ debug(printf("Parsed query %s into ",query)); *div = strtok(query,":"); if ((*div)[0] == '+') { *revcomp = false; *div = &((*div)[1]); } else if ((*div)[0] == '_') { *revcomp = true; *div = &((*div)[1]); } coords = strtok(NULL,":"); debug(printf("segment %s and coords %s\n",*div,coords)); /* Try chromosome first */ filename = (char *) CALLOC(strlen(genomesubdir)+strlen("/")+strlen(fileroot)+ strlen(".chromosome.iit")+1,sizeof(char)); sprintf(filename,"%s/%s.chromosome.iit",genomesubdir,fileroot); chromosome_iit = IIT_read(filename,/*name*/NULL,/*readonlyp*/true,/*divread*/READ_ALL, /*divstring*/NULL,/*add_iit_p*/false,/*labels_read_p*/true); FREE(filename); debug(printf("Interpreting segment %s as a chromosome\n",*div)); if (coords == NULL) { debug(printf(" entire chromosome\n")); rc = translate_chromosomepos_universal(&(*genomicstart),&(*genomiclength),*div,left=0,length=0,chromosome_iit); } else if (isnumberp(&result,coords)) { debug(printf(" and coords %s as a number\n",coords)); rc = translate_chromosomepos_universal(&(*genomicstart),&(*genomiclength),*div,left=result-1,length=1,chromosome_iit); } else if (isrange(&left,&length,&(*revcomp),coords)) { debug(printf(" and coords %s as a range starting at %u with length %u and revcomp = %d\n", coords,left,length,*revcomp)); rc = translate_chromosomepos_universal(&(*genomicstart),&(*genomiclength),*div,left,length,chromosome_iit); } else { debug(printf(" but coords %s is neither a number nor a range\n",coords)); rc = -1; } /* Compute chromosomal coordinates */ *chrstart = left; *chrend = *chrstart + *genomiclength; *chrstart += 1U; /* Make 1-based */ /* Get chromosomal information */ if ((theindex = IIT_find_one(chromosome_iit,*div)) < 0) { fprintf(stderr,"Cannot find chromosome %s in chromosome IIT file\n",*div); /* exit(9); */ } else { interval = IIT_interval(chromosome_iit,theindex); *chroffset = Interval_low(interval); *chrlength = Interval_length(interval); } IIT_free(&chromosome_iit); #if 0 /* Contig IIT's are of type 1, which require some work to compute on current div-based scheme. Just abandoning for now. */ if (rc != 0) { /* Try contig */ filename = (char *) CALLOC(strlen(genomesubdir)+strlen("/")+strlen(fileroot)+ strlen(".contig.iit")+1,sizeof(char)); sprintf(filename,"%s/%s.contig.iit",genomesubdir,fileroot); contig_iit = IIT_read(filename,/*name*/NULL,/*readonlyp*/true,/*divread*/READ_ALL, /*divstring*/NULL,/*add_iit_p*/false,/*labels_read_p*/true); FREE(filename); debug(printf("Interpreting segment %s as a contig\n",*div)); if (coords == NULL) { debug(printf(" entire contig\n")); rc = translate_contig_universal(&(*genomicstart),&(*genomiclength),*div,left=0,length=0,chromosome_iit); } else if (isnumberp(&result,coords)) { debug(printf(" and coords %s as a number\n",coords)); rc = translate_contig(&(*genomicstart),&(*genomiclength),*div,left=result-1,length=1,contig_iit); } else if (isrange(&left,&length,&(*revcomp),coords)) { debug(printf(" and coords %s as a range starting at %u with length %u and revcomp = %d\n", coords,left,length,*revcomp)); rc = translate_contig(&(*genomicstart),&(*genomiclength),*div,left,length,contig_iit); } else { debug(printf(" but coords %s is neither a number nor a range\n",coords)); rc = -1; } IIT_free(&contig_iit); } #endif if (rc != 0) { fprintf(stderr,"Can't find coordinates %s:%s\n",*div,coords); return false; } else { return true; } } else { /* Query must be a genomic position, genomic range, or contig */ *chrstart = *chroffset = *chrlength = 0; debug(printf("Parsed query %s as atomic ",query)); if (isnumberp(&result,query)) { debug(printf("number\n")); *genomicstart = result-1; *genomiclength = 1; } else if (isrange(&left,&length,&(*revcomp),query)) { debug(printf("range\n")); *genomicstart = left; *genomiclength = length; } else { debug(printf("contig\n")); return false; #if 0 filename = (char *) CALLOC(strlen(genomesubdir)+strlen("/")+strlen(fileroot)+ strlen(".contig.iit")+1,sizeof(char)); sprintf(filename,"%s/%s.contig.iit",genomesubdir,fileroot); contig_iit = IIT_read(filename,/*name*/NULL,/*readonlyp*/true,/*divread*/READ_ALL, /*divstring*/NULL,/*add_iit_p*/false,/*labels_read_p*/true); FREE(filename); rc = translate_contig(&(*genomicstart),&(*genomiclength),query,left=0,length=0,contig_iit); IIT_free(&contig_iit); #endif } *div = convert_to_chrpos(&(*chrstart),genomesubdir,fileroot,*genomicstart); *chrend = *chrstart + *genomiclength; *chrstart += 1U; /* Make 1-based */ /* Try chromosome first */ filename = (char *) CALLOC(strlen(genomesubdir)+strlen("/")+strlen(fileroot)+ strlen(".chromosome.iit")+1,sizeof(char)); sprintf(filename,"%s/%s.chromosome.iit",genomesubdir,fileroot); chromosome_iit = IIT_read(filename,/*name*/NULL,/*readonlyp*/true,/*divread*/READ_ALL, /*divstring*/NULL,/*add_iit_p*/false,/*labels_read_p*/true); FREE(filename); if ((theindex = IIT_find_one(chromosome_iit,*div)) < 0) { fprintf(stderr,"Cannot find chromosome %s in chromosome IIT file\n",*div); IIT_free(&chromosome_iit); return false; } else { interval = IIT_interval(chromosome_iit,theindex); *chroffset = Interval_low(interval); *chrlength = Interval_length(interval); IIT_free(&chromosome_iit); return true; } } }