static Seqpos *leftcontextofspecialchardist(unsigned int numofchars, const Encodedsequence *encseq, Readmode readmode) { GtUchar cc; unsigned int idx; Seqpos *specialchardist, totallength = getencseqtotallength(encseq); specialchardist = gt_malloc(sizeof(*specialchardist) * numofchars); for (idx = 0; idx<numofchars; idx++) { specialchardist[idx] = 0; } if (hasspecialranges(encseq)) { Specialrangeiterator *sri; Sequencerange range; sri = newspecialrangeiterator(encseq,true); if (ISDIRREVERSE(readmode)) { Readmode thismode = (readmode == Reversemode) ? Forwardmode : Complementmode; while (nextspecialrangeiterator(&range,sri)) { if (range.rightpos < totallength) { cc = getencodedchar(encseq,range.rightpos,thismode); if (ISNOTSPECIAL(cc)) { specialchardist[cc]++; } } } } else { while (nextspecialrangeiterator(&range,sri)) { gt_assert(range.leftpos < totallength); if (range.leftpos > 0) { cc = getencodedchar(encseq,range.leftpos-1,readmode); if (ISNOTSPECIAL(cc)) { specialchardist[cc]++; } } } } freespecialrangeiterator(&sri); } if (getencseqlengthofspecialsuffix(encseq) == 0) { cc = getencodedchar(encseq,totallength-1,readmode); gt_assert(ISNOTSPECIAL(cc)); specialchardist[cc]++; } return specialchardist; }
static void runscanatpostrial(const Encodedsequence *encseq, Encodedsequencescanstate *esr, Readmode readmode,Seqpos startpos) { Seqpos pos, totallength; GtUchar ccra, ccsr; totallength = getencseqtotallength(encseq); initEncodedsequencescanstate(esr,encseq,readmode,startpos); for (pos=startpos; pos < totallength; pos++) { ccra = getencodedchar(encseq,pos,readmode); /* Random access */ ccsr = sequentialgetencodedchar(encseq,esr,pos,readmode); if (ccra != ccsr) { fprintf(stderr,"startpos = " FormatSeqpos " access=%s, mode=%s: position=" FormatSeqpos ": random access (correct) = %u != %u = " " sequential read (wrong)\n", PRINTSeqposcast(startpos), encseqaccessname(encseq), showreadmode(readmode), PRINTSeqposcast(pos), (unsigned int) ccra, (unsigned int) ccsr); exit(GT_EXIT_PROGRAMMING_ERROR); } } }
static void forwardderive(const GtBucketspec2 *bucketspec2, Seqpos **targetptr, unsigned int source, Seqpos *idx) { Seqpos startpos; GtUchar cc; gt_assert (idx < targetptr[source]); for (; idx < targetptr[source]; idx++) { startpos = *idx; if (startpos > 0) { cc = getencodedchar(bucketspec2->encseq,startpos-1,bucketspec2->readmode); /*printf("fwd: superbucket[%u].sorted = %s\n",(unsigned int) cc, bucketspec2->superbuckettab[cc].sorted ? "true" : "false"); */ if (ISNOTSPECIAL(cc) && !bucketspec2->superbuckettab[cc].sorted) { *(targetptr[cc]) = startpos - 1; targetptr[cc]++; } } } }
unsigned long distanceofshortstringsencseq(unsigned long *eqsvector, unsigned int alphasize, const GtUchar *useq, unsigned long ulen, const Encodedsequence *encseq, Seqpos vstartpos, Seqpos vlen) { DECLARELOCALVARS; GtUchar cc; Seqpos pos; initeqsvector(eqsvector,(unsigned long) alphasize,useq,ulen); for (pos = vstartpos; pos < vstartpos + vlen; pos++) { cc = getencodedchar(encseq,pos,Forwardmode); COMPUTENEWDIST(cc); } return distval; }
static void backwardderive(const GtBucketspec2 *bucketspec2, Seqpos **targetptr, unsigned int source, Seqpos *idx) { Seqpos startpos; GtUchar cc; gt_assert (idx > targetptr[source]); for (; idx > targetptr[source]; idx--) { startpos = *idx; if (startpos > 0) { cc = getencodedchar(bucketspec2->encseq,startpos-1,bucketspec2->readmode); /*printf("back: superbucket[%u].sorted = %s\n",(unsigned int) cc, bucketspec2->superbuckettab[cc].sorted ? "true" : "false");*/ if (ISNOTSPECIAL(cc) && !bucketspec2->superbuckettab[cc].sorted) { /* if (suftab[targetptr[cc]] != startpos - 1) { fprintf(stderr,"targetptr[%u]=%lu: suftab = %lu != " "%lu = startpos - 1\n", cc, (unsigned long) targetptr[cc], (unsigned long) suftab[targetptr[cc]], (unsigned long) (startpos-1)); exit(EXIT_FAILURE); } */ *(targetptr[cc]) = startpos - 1; targetptr[cc]--; } } } }
Definedunsignedlong forwardprefixmatch(const Encodedsequence *encseq, unsigned int alphasize, Seqpos startpos, bool nowildcards, unsigned long *eqsvector, const GtUchar *useq, unsigned long ulen, unsigned long maxdistance) { DECLARELOCALVARS; Seqpos pos, totallength = getencseqtotallength(encseq); GtUchar cc; Definedunsignedlong result; initeqsvector(eqsvector,(unsigned long) alphasize,useq,ulen); gt_assert(maxdistance > 0); for (pos = startpos; /* Nothing */; pos++) { gt_assert(pos - startpos <= (Seqpos) (ulen + maxdistance)); cc = getencodedchar(encseq,pos,Forwardmode); if (nowildcards && cc == (GtUchar) WILDCARD) { result.defined = false; result.valueunsignedlong = 0; return result; } COMPUTENEWDIST(cc); if (distval <= maxdistance || pos == totallength-1) { break; } } result.defined = true; result.valueunsignedlong = (unsigned long) (pos - startpos + 1); return result; }
static int testfullscan(const GtStrArray *filenametab, const Encodedsequence *encseq, Readmode readmode, GtError *err) { Seqpos pos, totallength; GtUchar ccscan = 0, ccra, ccsr; GtSequenceBuffer *fb = NULL; int retval; bool haserr = false; Encodedsequencescanstate *esr; unsigned long long fullscanpbar = 0; gt_error_check(err); totallength = getencseqtotallength(encseq); gt_progressbar_start(&fullscanpbar,(unsigned long long) totallength); if (filenametab != NULL) { fb = gt_sequence_buffer_new_guess_type((GtStrArray*) filenametab, err); if (!fb) haserr = true; if (!haserr) gt_sequence_buffer_set_symbolmap(fb, getencseqAlphabetsymbolmap(encseq)); } if (!haserr) { esr = newEncodedsequencescanstate(); initEncodedsequencescanstate(esr,encseq,readmode,0); for (pos=0; /* Nothing */; pos++) { if (filenametab != NULL && readmode == Forwardmode) { retval = gt_sequence_buffer_next(fb,&ccscan,err); if (retval < 0) { haserr = true; break; } if (retval == 0) { break; } } else { if (pos >= totallength) { break; } } ccra = getencodedchar(encseq,pos,readmode); /* Random access */ if (filenametab != NULL && readmode == Forwardmode) { if (ccscan != ccra) { gt_error_set(err,"access=%s, position=" FormatSeqpos ": scan (readnextchar) = %u != " "%u = random access", encseqaccessname(encseq), pos, (unsigned int) ccscan, (unsigned int) ccra); haserr = true; break; } } ccsr = sequentialgetencodedchar(encseq,esr,pos,readmode); if (ccra != ccsr) { gt_error_set(err,"access=%s, mode=%s: position=" FormatSeqpos ": random access = %u != %u = sequential read", encseqaccessname(encseq), showreadmode(readmode), pos, (unsigned int) ccra, (unsigned int) ccsr); haserr = true; break; } fullscanpbar++; } gt_progressbar_stop(); } if (!haserr) { if (pos != totallength) { gt_error_set(err,"sequence length must be " FormatSeqpos " but is " FormatSeqpos,totallength,pos); haserr = true; } } freeEncodedsequencescanstate(&esr); gt_sequence_buffer_delete(fb); return haserr ? -1 : 0; }
static void producelongutput(const LTRharvestoptions *lo, const LTRboundaries *boundaries, const Encodedsequence *encseq, Seqpos offset) { const GtUchar *characters = getencseqAlphabetcharacters(encseq); printf(FormatSeqpos " ", PRINTSeqposcast(boundaries->leftLTR_5 -offset + 1)); printf(FormatSeqpos " ", PRINTSeqposcast(boundaries->rightLTR_3 -offset + 1)); printf(FormatSeqpos " ", PRINTSeqposcast((boundaries->rightLTR_3 - boundaries->leftLTR_5 + 1))); printf(FormatSeqpos " ", PRINTSeqposcast(boundaries->leftLTR_5 -offset + 1)); printf(FormatSeqpos " ", PRINTSeqposcast(boundaries->leftLTR_3 -offset + 1)); printf(FormatSeqpos " ", PRINTSeqposcast((boundaries->leftLTR_3 - boundaries->leftLTR_5 + 1))); if (lo->minlengthTSD > 1U) { Seqpos j; for (j = 0; j < boundaries->lenleftTSD; j++) { printf("%c",(char) characters[getencodedchar(encseq, boundaries->leftLTR_5 - boundaries->lenleftTSD + j, Forwardmode)]); } printf(" " FormatSeqpos " ", PRINTSeqposcast(boundaries->lenleftTSD)); } if (lo->motif.allowedmismatches < 4U) { printf("%c%c..%c%c ", (char) characters[getencodedchar(encseq,/* Random access */ boundaries->leftLTR_5, Forwardmode)], (char) characters[getencodedchar(encseq,/* Random access */ boundaries->leftLTR_5+1, Forwardmode)], (char) characters[getencodedchar(encseq,/* Random access */ boundaries->leftLTR_3-1, Forwardmode)], (char) characters[getencodedchar(encseq,/* Random access */ boundaries->leftLTR_3, Forwardmode)] ); } /* increase by 1 */ printf(FormatSeqpos " ", PRINTSeqposcast(boundaries->rightLTR_5 -offset + 1)); /* increase by 1 */ printf(FormatSeqpos " ",PRINTSeqposcast(boundaries->rightLTR_3 -offset + 1)); printf(FormatSeqpos " ",PRINTSeqposcast(boundaries->rightLTR_3 - boundaries->rightLTR_5 + 1)); if (lo->minlengthTSD > 1U) { Seqpos j; for (j = 0; j < boundaries->lenrightTSD; j++) { printf("%c", (char) characters[getencodedchar(encseq, boundaries->rightLTR_3+j+1, Forwardmode)]); } printf(" " FormatSeqpos " ",PRINTSeqposcast(boundaries->lenrightTSD)); } if (lo->motif.allowedmismatches < 4U) { printf("%c%c..%c%c", (char) characters[getencodedchar(encseq,/* Randomaccess */ boundaries->rightLTR_5, Forwardmode)], (char) characters[getencodedchar(encseq,/* Randomaccess */ boundaries->rightLTR_5+1, Forwardmode)], (char) characters[getencodedchar(encseq,/* Randomaccess */ boundaries->rightLTR_3-1, Forwardmode)], (char) characters[getencodedchar(encseq,/* Random access */ boundaries->rightLTR_3,/* Randomaccess */ Forwardmode)] ); } /* print similarity */ printf(" %.2f", boundaries->similarity); /* print sequence number */ printf(" %lu\n", boundaries->contignumber); }