static void gt_querysubstringmatch(bool selfmatch, const GtEncseq *dbencseq, const ESASuffixptr *suftabpart, GtReadmode readmode, GtUword numberofsuffixes, uint64_t queryunitnum, GtQueryrepresentation *queryrep, GtUword minmatchlength, GtProcessquerymatch processquerymatch, void *processquerymatchinfo, GtQuerymatch *querymatchspaceptr) { GtMMsearchiterator *mmsi; GtUword totallength, localqueryoffset = 0; uint64_t localqueryunitnum = queryunitnum; GtQuerysubstring querysubstring; gt_assert(numberofsuffixes > 0); totallength = gt_encseq_total_length(dbencseq); querysubstring.queryrep = queryrep; for (querysubstring.currentoffset = 0; querysubstring.currentoffset <= queryrep->seqlen - minmatchlength; querysubstring.currentoffset++) { GtUword dbstart; mmsi = gt_mmsearchiterator_new(dbencseq, suftabpart, 0, /* leftbound */ numberofsuffixes - 1, /* rightbound */ 0, /* offset */ readmode, &querysubstring, minmatchlength); while (gt_mmsearchiterator_next(&dbstart,mmsi)) { if (gt_mmsearch_isleftmaximal(dbencseq, readmode, dbstart, &querysubstring)) { GtUword dbseqnum, dbseqstartpos, dbseqlen, extend; extend = gt_mmsearch_extendright(dbencseq, mmsi->esr, readmode, totallength, dbstart + minmatchlength, &querysubstring, minmatchlength); if (gt_encseq_has_multiseq_support(dbencseq)) { dbseqnum = gt_encseq_seqnum(dbencseq,dbstart); dbseqstartpos = gt_encseq_seqstartpos(dbencseq,dbseqnum); dbseqlen = gt_encseq_seqlength(dbencseq,dbseqnum); } else { dbseqnum = dbseqstartpos = dbseqlen = 0; } gt_querymatch_init(querymatchspaceptr, minmatchlength + extend, dbstart, dbseqnum, dbstart - dbseqstartpos, dbseqlen, 0, /* score */ 0, /* edist */ selfmatch, localqueryunitnum, minmatchlength + extend, localqueryoffset, queryrep->seqlen); processquerymatch(processquerymatchinfo,querymatchspaceptr); } } gt_mmsearchiterator_delete(mmsi); mmsi = NULL; if (gt_mmsearch_accessquery(queryrep,querysubstring.currentoffset) == (GtUchar) SEPARATOR) { localqueryunitnum++; localqueryoffset = 0; } else { localqueryoffset++; } } }
int gt_querysubstringmatchiterator_next(GtQuerysubstringmatchiterator *qsmi, GtError *err) { gt_assert(qsmi != NULL); while (true) { if (qsmi->query_seqlen < qsmi->userdefinedleastlength) { if (qsmi->seqit != NULL) { int retval = gt_seq_iterator_next(qsmi->seqit, &qsmi->query_for_seqit, &qsmi->query_seqlen, &qsmi->desc, err); if (retval < 0) { return -1; /* error */ } if (retval == 0) { return 1; /* no more sequences */ } gt_assert(qsmi->query_seqlen > 0 && qsmi->query_for_seqit != NULL); qsmi->queryrep.sequence = qsmi->query_for_seqit; } else { if (qsmi->queryunitnum == qsmi->query_encseq_numofsequences) { return 1; } qsmi->queryrep.startpos = gt_encseq_seqstartpos(qsmi->queryrep.encseq, qsmi->queryunitnum); qsmi->query_seqlen = gt_encseq_seqlength(qsmi->queryrep.encseq, qsmi->queryunitnum); } gt_assert(qsmi->query_seqlen > 0); qsmi->queryrep.seqlen = qsmi->query_seqlen; qsmi->querysubstring.currentoffset = 0; } if (qsmi->query_seqlen >= qsmi->userdefinedleastlength) { if (!qsmi->mmsi_defined) { gt_mmsearchiterator_reinit(qsmi->mmsi, qsmi->dbencseq, qsmi->suftabpart, 0, /* l */ qsmi->numberofsuffixes - 1, /* r */ 0, /* offset */ qsmi->db_readmode, &qsmi->querysubstring, qsmi->userdefinedleastlength); qsmi->mmsi_defined = true; } else { if (gt_mmsearchiterator_next(&qsmi->dbstart,qsmi->mmsi)) { GtUword extend; if (gt_mmsearch_isleftmaximal(qsmi->dbencseq, qsmi->db_readmode, qsmi->dbstart, &qsmi->querysubstring)) { extend = gt_mmsearch_extendright(qsmi->dbencseq, qsmi->mmsi->esr, qsmi->db_readmode, qsmi->totallength, qsmi->dbstart + qsmi->userdefinedleastlength, &qsmi->querysubstring, qsmi->userdefinedleastlength); qsmi->matchlength = qsmi->userdefinedleastlength + extend; return 0; } } else { qsmi->mmsi_defined = false; if (qsmi->querysubstring.currentoffset + qsmi->userdefinedleastlength < qsmi->query_seqlen) { qsmi->querysubstring.currentoffset++; } else { qsmi->query_seqlen = 0; qsmi->queryunitnum++; } } } } else { qsmi->query_seqlen = 0; qsmi->queryunitnum++; } } }
static int gt_querysubstringmatch_generic( bool selfmatch, const GtEncseq *dbencseq, const ESASuffixptr *suftabpart, GtReadmode readmode, unsigned long numberofsuffixes, uint64_t queryunitnum, const GtQueryrep *queryrep, unsigned long minmatchlength, GtProcessquerymatch processquerymatch, void *processquerymatchinfo, GtQuerymatch *querymatchspaceptr, GtError *err) { GtMMsearchiterator *mmsi; unsigned long totallength, localqueryoffset = 0; uint64_t localqueryunitnum = queryunitnum; GtQuerysubstring querysubstring; bool haserr = false; gt_assert(numberofsuffixes > 0); totallength = gt_encseq_total_length(dbencseq); querysubstring.queryrep = queryrep; for (querysubstring.offset = 0; querysubstring.offset <= queryrep->length - minmatchlength; querysubstring.offset++) { unsigned long dbstart; mmsi = gt_mmsearchiterator_new_generic(dbencseq, suftabpart, 0, /* leftbound */ numberofsuffixes-1, /* rightbound */ 0, /* offset */ readmode, &querysubstring, minmatchlength); while (!haserr && gt_mmsearchiterator_next(&dbstart,mmsi)) { if (gt_mmsearch_isleftmaximal(dbencseq, readmode, dbstart, &querysubstring)) { unsigned long extend = gt_mmsearch_extendright(dbencseq, mmsi->esr, readmode, totallength, dbstart + minmatchlength, &querysubstring, minmatchlength); gt_querymatch_fill(querymatchspaceptr, minmatchlength + extend, dbstart, queryrep->readmode, queryrep->reversecopy, 0, /* score */ 0, /* edist */ selfmatch, localqueryunitnum, minmatchlength + extend, localqueryoffset); if (processquerymatch(processquerymatchinfo, dbencseq, querymatchspaceptr, queryrep->sequence, queryrep->length, err) != 0) { haserr = true; } } } gt_mmsearchiterator_delete(mmsi); mmsi = NULL; if (!haserr) { if (gt_mmsearch_accessquery(queryrep,querysubstring.offset) == (GtUchar) SEPARATOR) { localqueryunitnum++; localqueryoffset = 0; } else { localqueryoffset++; } } } return haserr ? -1 : 0; }