static int gt_simpleexactselfmatchoutput(void *info, const GtEncseq *encseq, unsigned long len, unsigned long pos1, unsigned long pos2, GT_UNUSED GtError *err) { unsigned long queryseqnum, seqstartpos, seqlength; GtQuerymatch *querymatch = (GtQuerymatch *) info; if (pos1 > pos2) { unsigned long tmp = pos1; pos1 = pos2; pos2 = tmp; } queryseqnum = gt_encseq_seqnum(encseq,pos2); seqstartpos = gt_encseq_seqstartpos(encseq, queryseqnum); seqlength = gt_encseq_seqlength(encseq, queryseqnum); gt_assert(pos2 >= seqstartpos); gt_querymatch_fill(querymatch, len, pos1, GT_READMODE_FORWARD, false, 0, 0, true, (uint64_t) queryseqnum, len, pos2 - seqstartpos); return gt_querymatch_output(info, encseq, querymatch, NULL, seqlength, err); }
int gt_querymatch_fill_and_output( GtUword dblen, GtUword dbstart, GtReadmode readmode, bool query_as_reversecopy, GtWord score, GtUword edist, bool selfmatch, uint64_t queryseqnum, GtUword querylen, GtUword querystart, GtQuerymatchoutoptions *querymatchoutoptions, const GtEncseq *encseq, const GtUchar *query, GtUword query_totallength, GtError *err) { GtQuerymatch querymatch; gt_querymatch_fill(&querymatch, dblen, dbstart, readmode, query_as_reversecopy, score, edist, selfmatch, queryseqnum, querylen, querystart); return gt_querymatch_output(querymatchoutoptions, encseq, &querymatch, query, query_totallength, err); }
static int gt_simpleexactselfmatchoutput(void *info, const GtGenericEncseq *genericencseq, GtUword len, GtUword pos1, GtUword pos2, GT_UNUSED GtError *err) { GtUword queryseqnum, seqstartpos, seqlength; GtQuerymatch *querymatch = (GtQuerymatch *) info; const GtEncseq *encseq; if (pos1 > pos2) { GtUword tmp = pos1; pos1 = pos2; pos2 = tmp; } gt_assert(genericencseq != NULL && genericencseq->hasencseq); encseq = genericencseq->seqptr.encseq; queryseqnum = gt_encseq_seqnum(encseq,pos2); seqstartpos = gt_encseq_seqstartpos(encseq, queryseqnum); seqlength = gt_encseq_seqlength(encseq, queryseqnum); gt_assert(pos2 >= seqstartpos); gt_querymatch_fill(querymatch, len, pos1, GT_READMODE_FORWARD, false, 0, 0, true, (uint64_t) queryseqnum, len, pos2 - seqstartpos); return gt_querymatch_output(info, encseq, querymatch, NULL, seqlength, err); }
static int gt_simplexdropselfmatchoutput(void *info, const GtGenericEncseq *genericencseq, GtUword len, GtUword pos1, GtUword pos2, GtError *err) { GtXdropmatchinfo *xdropmatchinfo = (GtXdropmatchinfo *) info; GtXdropscore score; GtUword dbseqnum, dbseqstartpos, dbseqlength, dbstart, dblen, querystart, queryseqnum, querylen, queryseqlength, queryseqstartpos, dbtotallength; const GtEncseq *encseq; gt_assert(genericencseq != NULL && genericencseq->hasencseq); encseq = genericencseq->seqptr.encseq; dbtotallength = gt_encseq_total_length(encseq); if (pos1 > pos2) { GtUword tmp = pos1; pos1 = pos2; pos2 = tmp; } dbseqnum = gt_encseq_seqnum(encseq,pos1), dbseqstartpos = gt_encseq_seqstartpos(encseq,dbseqnum), dbseqlength = gt_encseq_seqlength(encseq,dbseqnum); if (pos2 < dbseqstartpos + dbseqlength) { queryseqnum = dbseqnum; queryseqstartpos = dbseqstartpos; queryseqlength = dbseqlength; } else { queryseqnum = gt_encseq_seqnum(encseq,pos2); gt_assert(dbseqnum < queryseqnum); queryseqstartpos = gt_encseq_seqstartpos(encseq,queryseqnum); queryseqlength = gt_encseq_seqlength(encseq,queryseqnum); } if (pos1 > 0 && pos2 > 0) { gt_assert(pos1 >= dbseqstartpos && pos2 >= queryseqstartpos); gt_seqabstract_reinit_encseq(xdropmatchinfo->useq,encseq, pos1 - dbseqstartpos,0); gt_seqabstract_reinit_encseq(xdropmatchinfo->vseq,encseq, pos2 - queryseqstartpos,0); gt_evalxdroparbitscoresextend(false, &xdropmatchinfo->best_left, xdropmatchinfo->res, xdropmatchinfo->useq, xdropmatchinfo->vseq, pos1, pos2, xdropmatchinfo->belowscore); } else { xdropmatchinfo->best_left.ivalue = 0; xdropmatchinfo->best_left.jvalue = 0; xdropmatchinfo->best_left.score = 0; } if (pos1 + len < dbtotallength && pos2 + len < dbtotallength) { const GtUword seqend1 = dbseqstartpos + dbseqlength; const GtUword seqend2 = queryseqstartpos + queryseqlength; gt_assert(seqend1 >= pos1 + len && seqend2 >= pos2 + len); gt_seqabstract_reinit_encseq(xdropmatchinfo->useq, encseq,seqend1 - (pos1 + len),0); gt_seqabstract_reinit_encseq(xdropmatchinfo->vseq, encseq,seqend2 - (pos2 + len),0); gt_evalxdroparbitscoresextend(true, &xdropmatchinfo->best_right, xdropmatchinfo->res, xdropmatchinfo->useq, xdropmatchinfo->vseq, pos1 + len, pos2 + len, xdropmatchinfo->belowscore); } else { xdropmatchinfo->best_right.ivalue = 0; xdropmatchinfo->best_right.jvalue = 0; xdropmatchinfo->best_right.score = 0; } gt_assert(pos1 >= (GtUword) xdropmatchinfo->best_left.ivalue && pos2 >= (GtUword) xdropmatchinfo->best_left.jvalue); querystart = pos2 - xdropmatchinfo->best_left.jvalue; gt_assert(querystart >= queryseqstartpos); dblen = len + xdropmatchinfo->best_left.ivalue + xdropmatchinfo->best_right.ivalue; dbstart = pos1 - xdropmatchinfo->best_left.ivalue; querylen = len + xdropmatchinfo->best_left.jvalue + xdropmatchinfo->best_right.jvalue, score = (GtXdropscore) len * xdropmatchinfo->arbitscores.mat + xdropmatchinfo->best_left.score + xdropmatchinfo->best_right.score; gt_seqabstract_reinit_encseq(xdropmatchinfo->useq, encseq, dblen, dbstart); gt_seqabstract_reinit_encseq(xdropmatchinfo->vseq, encseq, querylen, querystart); gt_querymatch_fill(xdropmatchinfo->querymatchspaceptr, dblen, dbstart, GT_READMODE_FORWARD, false, score, greedyunitedist(xdropmatchinfo->frontresource, xdropmatchinfo->useq,xdropmatchinfo->vseq), true, (uint64_t) queryseqnum, querylen, querystart - queryseqstartpos); return gt_querymatch_output(info, encseq, xdropmatchinfo->querymatchspaceptr, NULL, gt_encseq_seqlength(encseq, queryseqnum), err); }
static int gt_processxdropquerymatches(void *info, const GtEncseq *encseq, const GtQuerymatch *querymatch, const GtUchar *query, GtUword query_totallength, GtError *err) { GtXdropmatchinfo *xdropmatchinfo = (GtXdropmatchinfo *) info; GtXdropscore score; GtUword querystart, dblen, dbstart, querylen; GtUword pos1 = gt_querymatch_dbstart(querymatch); GtUword pos2 = gt_querymatch_querystart(querymatch); GtUword len = gt_querymatch_querylen(querymatch); const GtUword dbtotallength = gt_encseq_total_length(encseq); uint64_t queryseqnum; GtUword dbseqnum, dbseqstartpos, dbseqlength; dbseqnum = gt_encseq_seqnum(encseq,pos1); dbseqstartpos = gt_encseq_seqstartpos(encseq,dbseqnum); dbseqlength = gt_encseq_seqlength(encseq,dbseqnum); if (pos1 > 0 && pos2 > 0) { gt_assert(dbseqstartpos < pos1); gt_seqabstract_reinit_encseq(xdropmatchinfo->useq,encseq, pos1 - dbseqstartpos,0); gt_seqabstract_reinit_gtuchar(xdropmatchinfo->vseq, query, pos2, 0); gt_evalxdroparbitscoresextend(false, &xdropmatchinfo->best_left, xdropmatchinfo->res, xdropmatchinfo->useq, xdropmatchinfo->vseq, pos1, pos2, xdropmatchinfo->belowscore); } else { xdropmatchinfo->best_left.ivalue = 0; xdropmatchinfo->best_left.jvalue = 0; xdropmatchinfo->best_left.score = 0; } if (pos1 + len < dbtotallength && pos2 + len < query_totallength) { gt_seqabstract_reinit_encseq(xdropmatchinfo->useq, encseq,dbseqstartpos + dbseqlength - (pos1 + len),0); gt_seqabstract_reinit_gtuchar(xdropmatchinfo->vseq, query,query_totallength - (pos2 + len), 0); gt_evalxdroparbitscoresextend(true, &xdropmatchinfo->best_right, xdropmatchinfo->res, xdropmatchinfo->useq, xdropmatchinfo->vseq, pos1 + len, pos2 + len, xdropmatchinfo->belowscore); } else { xdropmatchinfo->best_right.ivalue = 0; xdropmatchinfo->best_right.jvalue = 0; xdropmatchinfo->best_right.score = 0; } gt_assert(pos1 >= (GtUword) xdropmatchinfo->best_left.ivalue && pos2 >= (GtUword) xdropmatchinfo->best_left.jvalue); querystart = pos2 - xdropmatchinfo->best_left.jvalue; queryseqnum = gt_querymatch_queryseqnum(querymatch); dblen = len + xdropmatchinfo->best_left.ivalue + xdropmatchinfo->best_right.ivalue; dbstart = pos1 - xdropmatchinfo->best_left.ivalue; querylen = len + xdropmatchinfo->best_left.jvalue + xdropmatchinfo->best_right.jvalue, score = (GtXdropscore) len * xdropmatchinfo->arbitscores.mat + xdropmatchinfo->best_left.score + xdropmatchinfo->best_right.score; gt_seqabstract_reinit_encseq(xdropmatchinfo->useq, encseq, dblen, dbstart); gt_seqabstract_reinit_gtuchar(xdropmatchinfo->vseq, query, querylen, querystart); gt_querymatch_fill(xdropmatchinfo->querymatchspaceptr, dblen, dbstart, GT_READMODE_FORWARD, false, score, greedyunitedist(xdropmatchinfo->frontresource, xdropmatchinfo->useq,xdropmatchinfo->vseq), false, queryseqnum, querylen, querystart); return gt_querymatch_output(info, encseq, xdropmatchinfo->querymatchspaceptr, query, query_totallength, err); }
static int gt_querysubstringmatch_generic( bool selfmatch, const GtEncseq *dbencseq, const ESASuffixptr *suftabpart, GtReadmode readmode, unsigned long numberofsuffixes, uint64_t queryunitnum, const GtQueryrep *queryrep, unsigned long minmatchlength, GtProcessquerymatch processquerymatch, void *processquerymatchinfo, GtQuerymatch *querymatchspaceptr, GtError *err) { GtMMsearchiterator *mmsi; unsigned long totallength, localqueryoffset = 0; uint64_t localqueryunitnum = queryunitnum; GtQuerysubstring querysubstring; bool haserr = false; gt_assert(numberofsuffixes > 0); totallength = gt_encseq_total_length(dbencseq); querysubstring.queryrep = queryrep; for (querysubstring.offset = 0; querysubstring.offset <= queryrep->length - minmatchlength; querysubstring.offset++) { unsigned long dbstart; mmsi = gt_mmsearchiterator_new_generic(dbencseq, suftabpart, 0, /* leftbound */ numberofsuffixes-1, /* rightbound */ 0, /* offset */ readmode, &querysubstring, minmatchlength); while (!haserr && gt_mmsearchiterator_next(&dbstart,mmsi)) { if (gt_mmsearch_isleftmaximal(dbencseq, readmode, dbstart, &querysubstring)) { unsigned long extend = gt_mmsearch_extendright(dbencseq, mmsi->esr, readmode, totallength, dbstart + minmatchlength, &querysubstring, minmatchlength); gt_querymatch_fill(querymatchspaceptr, minmatchlength + extend, dbstart, queryrep->readmode, queryrep->reversecopy, 0, /* score */ 0, /* edist */ selfmatch, localqueryunitnum, minmatchlength + extend, localqueryoffset); if (processquerymatch(processquerymatchinfo, dbencseq, querymatchspaceptr, queryrep->sequence, queryrep->length, err) != 0) { haserr = true; } } } gt_mmsearchiterator_delete(mmsi); mmsi = NULL; if (!haserr) { if (gt_mmsearch_accessquery(queryrep,querysubstring.offset) == (GtUchar) SEPARATOR) { localqueryunitnum++; localqueryoffset = 0; } else { localqueryoffset++; } } } return haserr ? -1 : 0; }
static int gt_queryuniquematch(bool selfmatch, const Suffixarray *suffixarray, uint64_t queryunitnum, const GtQueryrep *queryrep, unsigned long minmatchlength, GtProcessquerymatch processquerymatch, void *processquerymatchinfo, GtQuerymatch *querymatchspaceptr, GtError *err) { unsigned long offset, totallength = gt_encseq_total_length(suffixarray->encseq), localqueryoffset = 0; uint64_t localqueryunitnum = queryunitnum; bool haserr = false; gt_assert(!selfmatch && queryrep->length >= minmatchlength); for (offset = 0; offset <= queryrep->length - minmatchlength; offset++) { unsigned long matchlen, dbstart; matchlen = gt_suffixarrayfindmums (suffixarray, 0, 0, /* leftbound */ totallength, /* rightbound */ &dbstart, queryrep->sequence + offset, queryrep->sequence + queryrep->length); if (dbstart != ULONG_MAX && matchlen >= minmatchlength && gt_mum_isleftmaximal(suffixarray->encseq, suffixarray->readmode, dbstart, offset, queryrep->sequence)) { gt_querymatch_fill(querymatchspaceptr, matchlen, dbstart, queryrep->readmode, queryrep->reversecopy, 0, /* score */ 0, /* edist */ selfmatch, localqueryunitnum, matchlen, localqueryoffset); if (processquerymatch(processquerymatchinfo, suffixarray->encseq, querymatchspaceptr, queryrep->sequence, queryrep->length, err) != 0) { haserr = true; } } if (!haserr) { if (queryrep->sequence[offset] == (GtUchar) SEPARATOR) { localqueryunitnum++; localqueryoffset = 0; } else { localqueryoffset++; } } } return haserr ? -1 : 0; }