static GtUword gt_mmsearch_extendright(const GtEncseq *dbencseq, GtEncseqReader *esr, GtReadmode readmode, GtUword totallength, GtUword dbend, const GtQuerysubstring *querysubstring, GtUword matchlength) { GtUchar dbchar; GtUword dbpos, querypos; if (dbend < totallength) { gt_encseq_reader_reinit_with_readmode(esr,dbencseq,readmode,dbend); } for (dbpos = dbend, querypos = querysubstring->currentoffset + matchlength; dbpos < totallength && querypos < querysubstring->queryrep->seqlen; dbpos++, querypos++) { dbchar = gt_encseq_reader_next_encoded_char(esr); if (ISSPECIAL(dbchar) || dbchar != gt_mmsearch_accessquery(querysubstring->queryrep,querypos)) { break; } } return dbpos - dbend; }
static bool gt_mmsearch_isleftmaximal(const GtEncseq *dbencseq, GtReadmode readmode, GtUword dbstart, const GtQuerysubstring *querysubstring) { GtUchar dbleftchar; if (dbstart == 0 || querysubstring->currentoffset == 0) { return true; } dbleftchar = gt_encseq_get_encoded_char(dbencseq, /* Random access */ dbstart-1, readmode); if (ISSPECIAL(dbleftchar) || dbleftchar != gt_mmsearch_accessquery(querysubstring->queryrep, querysubstring->currentoffset-1)) { return true; } return false; }
static void gt_querysubstringmatch(bool selfmatch, const GtEncseq *dbencseq, const ESASuffixptr *suftabpart, GtReadmode readmode, GtUword numberofsuffixes, uint64_t queryunitnum, GtQueryrepresentation *queryrep, GtUword minmatchlength, GtProcessquerymatch processquerymatch, void *processquerymatchinfo, GtQuerymatch *querymatchspaceptr) { GtMMsearchiterator *mmsi; GtUword totallength, localqueryoffset = 0; uint64_t localqueryunitnum = queryunitnum; GtQuerysubstring querysubstring; gt_assert(numberofsuffixes > 0); totallength = gt_encseq_total_length(dbencseq); querysubstring.queryrep = queryrep; for (querysubstring.currentoffset = 0; querysubstring.currentoffset <= queryrep->seqlen - minmatchlength; querysubstring.currentoffset++) { GtUword dbstart; mmsi = gt_mmsearchiterator_new(dbencseq, suftabpart, 0, /* leftbound */ numberofsuffixes - 1, /* rightbound */ 0, /* offset */ readmode, &querysubstring, minmatchlength); while (gt_mmsearchiterator_next(&dbstart,mmsi)) { if (gt_mmsearch_isleftmaximal(dbencseq, readmode, dbstart, &querysubstring)) { GtUword dbseqnum, dbseqstartpos, dbseqlen, extend; extend = gt_mmsearch_extendright(dbencseq, mmsi->esr, readmode, totallength, dbstart + minmatchlength, &querysubstring, minmatchlength); if (gt_encseq_has_multiseq_support(dbencseq)) { dbseqnum = gt_encseq_seqnum(dbencseq,dbstart); dbseqstartpos = gt_encseq_seqstartpos(dbencseq,dbseqnum); dbseqlen = gt_encseq_seqlength(dbencseq,dbseqnum); } else { dbseqnum = dbseqstartpos = dbseqlen = 0; } gt_querymatch_init(querymatchspaceptr, minmatchlength + extend, dbstart, dbseqnum, dbstart - dbseqstartpos, dbseqlen, 0, /* score */ 0, /* edist */ selfmatch, localqueryunitnum, minmatchlength + extend, localqueryoffset, queryrep->seqlen); processquerymatch(processquerymatchinfo,querymatchspaceptr); } } gt_mmsearchiterator_delete(mmsi); mmsi = NULL; if (gt_mmsearch_accessquery(queryrep,querysubstring.currentoffset) == (GtUchar) SEPARATOR) { localqueryunitnum++; localqueryoffset = 0; } else { localqueryoffset++; } } }
static int gt_querysubstringmatch_generic( bool selfmatch, const GtEncseq *dbencseq, const ESASuffixptr *suftabpart, GtReadmode readmode, unsigned long numberofsuffixes, uint64_t queryunitnum, const GtQueryrep *queryrep, unsigned long minmatchlength, GtProcessquerymatch processquerymatch, void *processquerymatchinfo, GtQuerymatch *querymatchspaceptr, GtError *err) { GtMMsearchiterator *mmsi; unsigned long totallength, localqueryoffset = 0; uint64_t localqueryunitnum = queryunitnum; GtQuerysubstring querysubstring; bool haserr = false; gt_assert(numberofsuffixes > 0); totallength = gt_encseq_total_length(dbencseq); querysubstring.queryrep = queryrep; for (querysubstring.offset = 0; querysubstring.offset <= queryrep->length - minmatchlength; querysubstring.offset++) { unsigned long dbstart; mmsi = gt_mmsearchiterator_new_generic(dbencseq, suftabpart, 0, /* leftbound */ numberofsuffixes-1, /* rightbound */ 0, /* offset */ readmode, &querysubstring, minmatchlength); while (!haserr && gt_mmsearchiterator_next(&dbstart,mmsi)) { if (gt_mmsearch_isleftmaximal(dbencseq, readmode, dbstart, &querysubstring)) { unsigned long extend = gt_mmsearch_extendright(dbencseq, mmsi->esr, readmode, totallength, dbstart + minmatchlength, &querysubstring, minmatchlength); gt_querymatch_fill(querymatchspaceptr, minmatchlength + extend, dbstart, queryrep->readmode, queryrep->reversecopy, 0, /* score */ 0, /* edist */ selfmatch, localqueryunitnum, minmatchlength + extend, localqueryoffset); if (processquerymatch(processquerymatchinfo, dbencseq, querymatchspaceptr, queryrep->sequence, queryrep->length, err) != 0) { haserr = true; } } } gt_mmsearchiterator_delete(mmsi); mmsi = NULL; if (!haserr) { if (gt_mmsearch_accessquery(queryrep,querysubstring.offset) == (GtUchar) SEPARATOR) { localqueryunitnum++; localqueryoffset = 0; } else { localqueryoffset++; } } } return haserr ? -1 : 0; }