static Seqpos *leftcontextofspecialchardist(unsigned int numofchars, const Encodedsequence *encseq, Readmode readmode) { GtUchar cc; unsigned int idx; Seqpos *specialchardist, totallength = getencseqtotallength(encseq); specialchardist = gt_malloc(sizeof(*specialchardist) * numofchars); for (idx = 0; idx<numofchars; idx++) { specialchardist[idx] = 0; } if (hasspecialranges(encseq)) { Specialrangeiterator *sri; Sequencerange range; sri = newspecialrangeiterator(encseq,true); if (ISDIRREVERSE(readmode)) { Readmode thismode = (readmode == Reversemode) ? Forwardmode : Complementmode; while (nextspecialrangeiterator(&range,sri)) { if (range.rightpos < totallength) { cc = getencodedchar(encseq,range.rightpos,thismode); if (ISNOTSPECIAL(cc)) { specialchardist[cc]++; } } } } else { while (nextspecialrangeiterator(&range,sri)) { gt_assert(range.leftpos < totallength); if (range.leftpos > 0) { cc = getencodedchar(encseq,range.leftpos-1,readmode); if (ISNOTSPECIAL(cc)) { specialchardist[cc]++; } } } } freespecialrangeiterator(&sri); } if (getencseqlengthofspecialsuffix(encseq) == 0) { cc = getencodedchar(encseq,totallength-1,readmode); gt_assert(ISNOTSPECIAL(cc)); specialchardist[cc]++; } return specialchardist; }
static void backwardderive(const GtBucketspec2 *bucketspec2, GtSuffixsortspace *suffixsortspace, GtUword *targetoffset, unsigned int source, GtUword idx) { GtUword startpos; GtUchar cc; for (; idx + 1 > targetoffset[source] + 1; idx--) { startpos = gt_suffixsortspace_getdirect(suffixsortspace,idx); if (startpos > 0) { cc = gt_encseq_get_encoded_char(bucketspec2->encseq, startpos-1, bucketspec2->readmode); if (ISNOTSPECIAL(cc) && !bucketspec2->superbuckettab[cc].sorted) { gt_suffixsortspace_setdirect(suffixsortspace,targetoffset[cc], startpos - 1); targetoffset[cc]--; } } } }
bool gt_Enumcodeatposition_filledqgramcodestopatmax( GtCodetype *code, const Enumcodeatposition *ecp, unsigned int prefixindex, unsigned long pos, GtCodetype stopcode) { GtCodetype tmpcode; unsigned int idx; GtUchar cc; gt_assert(prefixindex > 0 && prefixindex < ecp->prefixlength); tmpcode = ecp->filltable[prefixindex]; if (tmpcode > stopcode) { return false; } for (idx=0; idx<prefixindex; idx++) { gt_assert((unsigned long) (pos + idx) < ecp->totallength); cc = gt_encseq_get_encoded_char_nospecial(ecp->encseq, pos + idx, ecp->readmode); gt_assert(ISNOTSPECIAL(cc)); tmpcode += ecp->multimappower[idx][cc]; if (tmpcode > stopcode) { return false; } } *code = tmpcode; return true; }
static void iteritvdistribution(GtArrayuint64_t *distribution, const GtEncseq *encseq, GtReadmode readmode, unsigned long totallength, unsigned long minmersize, unsigned long maxmersize, unsigned long length, unsigned long startpos) { if (length <= (unsigned long) maxmersize) { unsigned long ulen, pos; for (ulen = length, pos = startpos + length - 1; ulen <= (unsigned long) maxmersize && pos < totallength && ISNOTSPECIAL(gt_encseq_get_encoded_char(encseq,pos,readmode)); pos++, ulen++) { if (ulen >= (unsigned long) minmersize) { adddistributionuint64_t(distribution,(unsigned long) ulen,1UL); } } } }
static void forwardderive(const GtBucketspec2 *bucketspec2, Seqpos **targetptr, unsigned int source, Seqpos *idx) { Seqpos startpos; GtUchar cc; gt_assert (idx < targetptr[source]); for (; idx < targetptr[source]; idx++) { startpos = *idx; if (startpos > 0) { cc = getencodedchar(bucketspec2->encseq,startpos-1,bucketspec2->readmode); /*printf("fwd: superbucket[%u].sorted = %s\n",(unsigned int) cc, bucketspec2->superbuckettab[cc].sorted ? "true" : "false"); */ if (ISNOTSPECIAL(cc) && !bucketspec2->superbuckettab[cc].sorted) { *(targetptr[cc]) = startpos - 1; targetptr[cc]++; } } } }
static void updatespecialpositions(Kmerstream *spwp, GtUchar charcode, bool doshift, GtUchar lchar) { if (doshift) { if (!specialqueueisempty(&spwp->spos)) { Specialitem *head; /* only here we add some element to the queue */ head = specialheadofqueue(&spwp->spos); if (head->distvalue > 0) { SUBTRACTLCHARANDSHIFT(head->codeforleftcontext,lchar,spwp->numofchars, spwp->multimappower[0]); head->distvalue--; } else { specialdeleteheadofqueue(&spwp->spos); if (!specialqueueisempty(&spwp->spos)) { head = specialheadofqueue(&spwp->spos); head->distvalue--; } } } } if (ISNOTSPECIAL(charcode)) { if (spwp->lengthwithoutspecial == spwp->kmersize) { SUBTRACTLCHARSHIFTADDNEXT(spwp->codewithoutspecial, lchar, spwp->numofchars, spwp->multimappower[0], charcode); } else { spwp->codewithoutspecial += spwp->multimappower[spwp->lengthwithoutspecial][charcode]; spwp->lengthwithoutspecial++; } } else { /* only here we add some element to the queue */ Specialitem newelem; /* memset(&newelem, 0, sizeof (newelem)); */ if (specialqueueisempty(&spwp->spos)) newelem.distvalue = spwp->windowwidth-1U; else newelem.distvalue = spwp->lengthwithoutspecial+1U; if (spwp->lengthwithoutspecial == spwp->kmersize) { SUBTRACTLCHARANDSHIFT(spwp->codewithoutspecial,lchar, spwp->numofchars,spwp->multimappower[0]); } newelem.codeforleftcontext = spwp->codewithoutspecial; specialenqueue(&spwp->spos, newelem); spwp->lengthwithoutspecial = 0; spwp->codewithoutspecial = 0; } }
static void kmerstream_updatespecialpositions(GtKmerstream *spwp, GtUchar charcode, bool doshift, GtUchar leftchar) { if (doshift && !special_queue_is_empty(&spwp->specialqueue)) { GtSpecialcontext *head = special_queue_head_get(&spwp->specialqueue); if (head->lengthofleftcontext > 0) { SUBTRACTLCHARANDSHIFT(head->codeofleftcontext,leftchar,spwp->numofchars, spwp->multimappower[0]); head->lengthofleftcontext--; } else { special_queueu_head_delete(&spwp->specialqueue); } } if (ISNOTSPECIAL(charcode)) { if (spwp->lengthwithoutspecial == spwp->kmersize) { SUBTRACTLCHARSHIFTADDNEXT(spwp->codewithoutspecial, leftchar, spwp->numofchars, spwp->multimappower[0], charcode); } else { spwp->codewithoutspecial += spwp->multimappower[spwp->lengthwithoutspecial][charcode]; spwp->lengthwithoutspecial++; } } else { /* only here we add some element to the queue */ unsigned int newelem_lengthofleftcontext = special_queue_is_empty(&spwp->specialqueue) ? (spwp->windowwidth - 1U) : spwp->lengthwithoutspecial; if (spwp->lengthwithoutspecial == spwp->kmersize) { SUBTRACTLCHARANDSHIFT(spwp->codewithoutspecial, leftchar, spwp->numofchars, spwp->multimappower[0]); } /* only here we add some element to the queue */ gt_assert(newelem_lengthofleftcontext < spwp->kmersize); special_queue_enqueue(&spwp->specialqueue, newelem_lengthofleftcontext, spwp->codewithoutspecial); spwp->lengthwithoutspecial = 0; spwp->codewithoutspecial = 0; } }
static inline GtWtreeSymbol gt_wtree_encseq_map(GtWtreeEncseq *wtree_encseq, GtUchar symbol) { if (ISNOTSPECIAL(symbol)) return (GtWtreeSymbol) symbol; else { if (symbol == (GtUchar) SEPARATOR) { return (GtWtreeSymbol) wtree_encseq->alpha_size - 1; } if (symbol == (GtUchar) WILDCARD) return (GtWtreeSymbol) wtree_encseq->alpha_size - 2; } gt_assert(symbol == (GtUchar) UNDEFCHAR); return (GtWtreeSymbol) wtree_encseq->alpha_size - 3; }
GtWord gt_alignment_eval_with_score(const GtAlignment *alignment, GtWord matchscore, GtWord mismatchscore, GtWord gapscore) { GtUword i, j, idx_u = 0, idx_v = 0, meoplen; GtWord sumscore = 0; GtMultieop *meop; gt_assert(alignment != NULL); gt_assert(gt_alignment_is_valid(alignment)); meoplen = gt_multieoplist_get_length(alignment->eops); for (i = meoplen; i > 0; i--) { meop = gt_multieoplist_get_entry(alignment->eops, i - 1); switch (meop->type) { case Mismatch: case Match: case Replacement: for (j = 0; j < meop->steps; j++) { if (alignment->u[idx_u] == alignment->v[idx_v] && ISNOTSPECIAL(alignment->u[idx_u])) { sumscore += matchscore; } else { sumscore += mismatchscore; } idx_u++; idx_v++; } break; case Deletion: sumscore += gapscore * meop->steps; idx_u += meop->steps; break; case Insertion: sumscore += gapscore * meop->steps; idx_v += meop->steps; break; } } return sumscore; }
void gt_fprintfencseq(FILE *fpout, const GtEncseq *encseq, unsigned long start, unsigned long wlen) { unsigned long idx; GtUchar currentchar; const GtAlphabet *alpha; alpha = gt_encseq_alphabet(encseq); for (idx = start; idx < start + wlen; idx++) { currentchar = gt_encseq_get_encoded_char(encseq, idx, GT_READMODE_FORWARD); gt_assert(ISNOTSPECIAL(currentchar)); gt_alphabet_echo_pretty_symbol(alpha,fpout,currentchar); } }
void bare_encseq_convert(GtBareEncseq *bare_encseq,bool forward,bool direct) { GtUchar *leftptr, *rightptr; if (forward) { gt_assert(!direct); for (leftptr = bare_encseq->sequence; leftptr < bare_encseq->sequence + bare_encseq->totallength; leftptr++) { if (ISNOTSPECIAL(*leftptr)) { *leftptr = GT_COMPLEMENTBASE(*leftptr); } } } else { if (direct) { for (leftptr = bare_encseq->sequence, rightptr = bare_encseq->sequence + bare_encseq->totallength - 1; leftptr < rightptr; leftptr++, rightptr--) { GtUchar tmp = *leftptr; *leftptr = *rightptr; *rightptr = tmp; } } else { for (leftptr = bare_encseq->sequence, rightptr = bare_encseq->sequence + bare_encseq->totallength - 1; leftptr <= rightptr; leftptr++, rightptr--) { GtUchar tmp = *leftptr; *leftptr = ISSPECIAL(*rightptr) ? *rightptr : GT_COMPLEMENTBASE(*rightptr); *rightptr = ISSPECIAL(tmp) ? tmp : GT_COMPLEMENTBASE(tmp); } } } }
static void verifymatch(const GtEncseq *encseq, GtUword len, GtUword pos1, uint64_t seqnum2, GtUword pos2, GtReadmode readmode) { if (readmode == GT_READMODE_REVERSE) { GtUword offset, seqstartpos, totallength = gt_encseq_total_length(encseq); GtUchar cc1, cc2; seqstartpos = gt_encseq_seqstartpos(encseq, seqnum2); pos2 += seqstartpos; for (offset = 0; offset < len; offset++) { gt_assert(pos1 + len - 1 < totallength); gt_assert(pos2 + len - 1 < totallength); cc1 = gt_encseq_get_encoded_char(encseq,pos1+offset,GT_READMODE_FORWARD); cc2 = gt_encseq_get_encoded_char(encseq,pos2+len-1-offset, GT_READMODE_FORWARD); gt_assert(cc1 == cc2 && ISNOTSPECIAL(cc1)); } if (pos1 + len < totallength) { cc1 = gt_encseq_get_encoded_char(encseq,pos1+len,GT_READMODE_FORWARD); } else { cc1 = SEPARATOR; } if (pos2 > 0) { cc2 = gt_encseq_get_encoded_char(encseq,pos2-1,GT_READMODE_FORWARD); } else { cc2 = SEPARATOR; } gt_assert(cc1 != cc2 || ISSPECIAL(cc1)); } }
GtCodetype gt_Enumcodeatposition_filledqgramcode(const Enumcodeatposition *ecp, unsigned int prefixindex, unsigned long pos) { GtCodetype code; unsigned int idx; GtUchar cc; gt_assert(prefixindex > 0 && prefixindex < ecp->prefixlength); code = ecp->filltable[prefixindex]; for (idx=0; idx<prefixindex; idx++) { gt_assert((unsigned long) (pos + idx) < ecp->totallength); cc = gt_encseq_get_encoded_char_nospecial(ecp->encseq, pos + idx, ecp->readmode); gt_assert(ISNOTSPECIAL(cc)); code += ecp->multimappower[idx][cc]; } return code; }
static void backwardderive(const GtBucketspec2 *bucketspec2, Seqpos **targetptr, unsigned int source, Seqpos *idx) { Seqpos startpos; GtUchar cc; gt_assert (idx > targetptr[source]); for (; idx > targetptr[source]; idx--) { startpos = *idx; if (startpos > 0) { cc = getencodedchar(bucketspec2->encseq,startpos-1,bucketspec2->readmode); /*printf("back: superbucket[%u].sorted = %s\n",(unsigned int) cc, bucketspec2->superbuckettab[cc].sorted ? "true" : "false");*/ if (ISNOTSPECIAL(cc) && !bucketspec2->superbuckettab[cc].sorted) { /* if (suftab[targetptr[cc]] != startpos - 1) { fprintf(stderr,"targetptr[%u]=%lu: suftab = %lu != " "%lu = startpos - 1\n", cc, (unsigned long) targetptr[cc], (unsigned long) suftab[targetptr[cc]], (unsigned long) (startpos-1)); exit(EXIT_FAILURE); } */ *(targetptr[cc]) = startpos - 1; targetptr[cc]--; } } } }
static GtUword *leftcontextofspecialchardist(unsigned int numofchars, const GtEncseq *encseq, GtReadmode readmode) { GtUchar cc; unsigned int idx; GtUword *specialchardist, totallength = gt_encseq_total_length(encseq); GtReadmode convertedreadmode = (readmode == GT_READMODE_REVERSE) ? GT_READMODE_FORWARD : GT_READMODE_COMPL; specialchardist = gt_malloc(sizeof (*specialchardist) * numofchars); for (idx = 0; idx<numofchars; idx++) { specialchardist[idx] = 0; } if (gt_encseq_has_specialranges(encseq)) { GtSpecialrangeiterator *sri; GtRange range; sri = gt_specialrangeiterator_new(encseq,true); if (GT_ISDIRREVERSE(readmode)) { while (gt_specialrangeiterator_next(sri,&range)) { if (range.end < totallength) { cc = gt_encseq_get_encoded_char(encseq,range.end,convertedreadmode); if (ISNOTSPECIAL(cc)) { specialchardist[cc]++; } } } } else { while (gt_specialrangeiterator_next(sri,&range)) { if (range.start > 0) { cc = gt_encseq_get_encoded_char(encseq,range.start-1,readmode); if (ISNOTSPECIAL(cc)) { specialchardist[cc]++; } } } } gt_specialrangeiterator_delete(sri); } if (GT_ISDIRREVERSE(readmode)) { if (gt_encseq_lengthofspecialprefix(encseq) == 0) { cc = gt_encseq_get_encoded_char(encseq,0,convertedreadmode); gt_assert(ISNOTSPECIAL(cc)); specialchardist[cc]++; } } else { if (gt_encseq_lengthofspecialsuffix(encseq) == 0) { cc = gt_encseq_get_encoded_char(encseq,totallength-1,readmode); gt_assert(ISNOTSPECIAL(cc)); specialchardist[cc]++; } } return specialchardist; }
void gt_alignment_show_with_mapped_chars(const GtAlignment *alignment, const GtUchar *characters, GtUchar wildcardshow, FILE *fp) { GtUword i, j, idx_u, idx_v, meoplen; GtMultieop *meop; gt_assert(alignment); gt_assert(gt_alignment_is_valid(alignment)); meoplen = gt_multieoplist_get_length(alignment->eops); /* output first line */ idx_u = 0; for (i = meoplen; i > 0; i--) { meop = gt_multieoplist_get_entry(alignment->eops, i - 1); switch (meop->type) { case Mismatch: case Match: case Replacement: case Deletion: for (j = 0; j < meop->steps; j++) { gt_xfputc(ISSPECIAL(alignment->u[idx_u]) ? (int) wildcardshow : (int) characters[alignment->u[idx_u]], fp); idx_u++; } break; case Insertion: for (j = 0; j < meop->steps; j++) { gt_xfputc(GAPSYMBOL, fp); } break; } } gt_xfputc('\n', fp); /* output middle line */ idx_u = idx_v = 0; for (i = meoplen; i > 0; i--) { meop = gt_multieoplist_get_entry(alignment->eops, i - 1); switch (meop->type) { case Mismatch: case Match: case Replacement: for (j = 0; j < meop->steps; j++) { if (alignment->u[idx_u] == alignment->v[idx_v] && ISNOTSPECIAL(alignment->u[idx_u])) { gt_xfputc(MATCHSYMBOL, fp); } else { gt_xfputc(MISMATCHSYMBOL, fp); } idx_u++; idx_v++; } break; case Deletion: for (j = 0; j < meop->steps; j++) { gt_xfputc(MISMATCHSYMBOL, fp); idx_u++; } break; case Insertion: for (j = 0; j < meop->steps; j++) { gt_xfputc(MISMATCHSYMBOL, fp); idx_v++; } break; } } gt_xfputc('\n', fp); /* ouput last line */ idx_v = 0; for (i = meoplen; i > 0; i--) { meop = gt_multieoplist_get_entry(alignment->eops, i - 1); switch (meop->type) { case Mismatch: case Match: case Replacement: case Insertion: for (j = 0; j < meop->steps; j++) { gt_xfputc(ISSPECIAL(alignment->v[idx_v]) ? (int) wildcardshow : (int) characters[alignment->v[idx_v]], fp); idx_v++; } break; case Deletion: for (j = 0; j < meop->steps; j++) { gt_xfputc(GAPSYMBOL, fp); } break; } } gt_xfputc('\n', fp); }
static inline void getMatchBound(const BWTSeq *bwtSeq, const Symbol *query, size_t queryLen, struct matchBound *match, bool forward) { const Symbol *qptr, *qend; unsigned int cc; const Mbtab *mbptr; GtPrebwtstate prebwt; gt_assert(bwtSeq && query); if (forward) { qptr = query; qend = query + queryLen; } else { qptr = query + queryLen - 1; qend = query - 1; } gt_assert(ISNOTSPECIAL(*qptr)); cc = (unsigned int) *qptr; prebwt.mbtab = gt_bwtseq2mbtab((const FMindex *) bwtSeq); if (prebwt.mbtab != NULL) { prebwt.numofchars = gt_bwtseq2numofchars((const FMindex *) bwtSeq); prebwt.maxdepth = gt_bwtseq2maxdepth((const FMindex *) bwtSeq); prebwt.code = 0; prebwt.depth = 0; mbptr = gt_prebwt_next(&prebwt,cc); match->start = mbptr->lowerbound; match->end = mbptr->upperbound; } else { prebwt.numofchars = GT_UNDEF_UINT; prebwt.maxdepth = GT_UNDEF_UINT; prebwt.code = 0; prebwt.depth = GT_UNDEF_UINT; match->start = bwtSeq->count[cc]; match->end = bwtSeq->count[cc + 1]; } qptr = forward ? (qptr+1) : (qptr-1); while (match->start < match->end && qptr != qend) { GtUwordPair occPair; gt_assert(ISNOTSPECIAL(*qptr)); cc = (unsigned int) *qptr; if (prebwt.mbtab != NULL && prebwt.depth < prebwt.maxdepth) { mbptr = gt_prebwt_next(&prebwt,cc); match->start = mbptr->lowerbound; match->end = mbptr->upperbound; } else { occPair = BWTSeqTransformedPosPairOcc(bwtSeq, (Symbol) cc, match->start, match->end); match->start = bwtSeq->count[cc] + occPair.a; match->end = bwtSeq->count[cc] + occPair.b; } qptr = forward ? (qptr+1) : (qptr-1); } }