static int gt_simplesuffixprefixmatchoutput(GT_UNUSED void *info, const GtGenericEncseq *genericencseq, GtUword matchlen, GtUword pos1, GtUword pos2, GT_UNUSED GtError *err) { GtUword seqnum1, relpos1, seqnum2, relpos2, seqstartpos; const GtEncseq *encseq; if (pos1 > pos2) { GtUword tmp = pos1; pos1 = pos2; pos2 = tmp; } gt_assert(genericencseq != NULL && genericencseq->hasencseq); encseq = genericencseq->seqptr.encseq; seqnum1 = gt_encseq_seqnum(encseq,pos1); seqstartpos = gt_encseq_seqstartpos(encseq, seqnum1); gt_assert(seqstartpos <= pos1); relpos1 = pos1 - seqstartpos; seqnum2 = gt_encseq_seqnum(encseq,pos2); seqstartpos = gt_encseq_seqstartpos(encseq, seqnum2); gt_assert(seqstartpos <= pos2); relpos2 = pos2 - seqstartpos; if (relpos1 == 0) { GtUword seqlen2 = gt_encseq_seqlength(encseq,seqnum2); if (relpos2 + matchlen == seqlen2) { printf(""GT_WU" "GT_WU" "GT_WU"\n",seqnum2,seqnum1,matchlen); } } else { if (relpos2 == 0) { GtUword seqlen1 = gt_encseq_seqlength(encseq,seqnum1); if (relpos1 + matchlen == seqlen1) { printf(""GT_WU" "GT_WU" "GT_WU"\n",seqnum1,seqnum2,matchlen); } } } return 0; }
static int gt_simpleexactselfmatchoutput(void *info, const GtEncseq *encseq, unsigned long len, unsigned long pos1, unsigned long pos2, GT_UNUSED GtError *err) { unsigned long queryseqnum, seqstartpos, seqlength; GtQuerymatch *querymatch = (GtQuerymatch *) info; if (pos1 > pos2) { unsigned long tmp = pos1; pos1 = pos2; pos2 = tmp; } queryseqnum = gt_encseq_seqnum(encseq,pos2); seqstartpos = gt_encseq_seqstartpos(encseq, queryseqnum); seqlength = gt_encseq_seqlength(encseq, queryseqnum); gt_assert(pos2 >= seqstartpos); gt_querymatch_fill(querymatch, len, pos1, GT_READMODE_FORWARD, false, 0, 0, true, (uint64_t) queryseqnum, len, pos2 - seqstartpos); return gt_querymatch_output(info, encseq, querymatch, NULL, seqlength, err); }
static void showmatch(void *processinfo,const GtIdxMatch *match) { Showmatchinfo *showmatchinfo = (Showmatchinfo *) processinfo; unsigned long seqnum; unsigned long relpos; if (match->dbabsolute) { unsigned long seqstartpos; seqnum = gt_encseq_seqnum(showmatchinfo->encseq, match->dbstartpos); seqstartpos = gt_encseq_seqstartpos(showmatchinfo->encseq, seqnum); gt_assert(seqstartpos <= match->dbstartpos); relpos = match->dbstartpos - seqstartpos; } else { relpos = match->dbstartpos; seqnum = match->dbseqnum; } printf("%lu\t%lu\t",seqnum,relpos); printf("%lu\t",match->dblen); printf("\t" Formatuint64_t "\t%lu\t%lu\t%lu\n", PRINTuint64_tcast(showmatchinfo->queryunit), match->querystartpos, match->querylen, match->distance); if (showmatchinfo->showalignment) { gt_alignment_show_with_mapped_chars( (const GtAlignment *) match->alignment, showmatchinfo->characters, showmatchinfo->wildcardshow, stdout); } }
static int gt_simplesuffixprefixmatchoutput(GT_UNUSED void *info, const GtEncseq *encseq, unsigned long matchlen, unsigned long pos1, unsigned long pos2, GT_UNUSED GtError *err) { unsigned long seqnum1, relpos1, seqnum2, relpos2, seqstartpos; if (pos1 > pos2) { unsigned long tmp = pos1; pos1 = pos2; pos2 = tmp; } seqnum1 = gt_encseq_seqnum(encseq,pos1); seqstartpos = gt_encseq_seqstartpos(encseq, seqnum1); gt_assert(seqstartpos <= pos1); relpos1 = pos1 - seqstartpos; seqnum2 = gt_encseq_seqnum(encseq,pos2); seqstartpos = gt_encseq_seqstartpos(encseq, seqnum2); gt_assert(seqstartpos <= pos2); relpos2 = pos2 - seqstartpos; if (relpos1 == 0) { unsigned long seqlen2 = gt_encseq_seqlength(encseq,seqnum2); if (relpos2 + matchlen == seqlen2) { printf("%lu %lu %lu\n",seqnum2,seqnum1,matchlen); } } else { if (relpos2 == 0) { unsigned long seqlen1 = gt_encseq_seqlength(encseq,seqnum1); if (relpos1 + matchlen == seqlen1) { printf("%lu %lu %lu\n",seqnum1,seqnum2,matchlen); } } } return 0; }
static int encseq_lua_seqnum(lua_State *L) { GtEncseq **encseq; GtUword pos; encseq = check_encseq(L, 1); pos = luaL_checknumber(L, 2); luaL_argcheck(L, pos < gt_encseq_total_length(*encseq), 2, "cannot exceed total length of encoded sequence"); lua_pushnumber(L, gt_encseq_seqnum(*encseq, pos)); return 1; }
static void storematch(void *info,const GtIdxMatch *match) { Storematchinfo *storematch = (Storematchinfo *) info; unsigned long seqnum; if (match->dbabsolute) { seqnum = gt_encseq_seqnum(storematch->encseq, match->dbstartpos); } else { seqnum = match->dbseqnum; } if (!GT_ISIBITSET(storematch->hasmatch,seqnum)) { GT_SETIBIT(storematch->hasmatch,seqnum); } }
static inline int processleafedge_rdjcv(GT_UNUSED bool firstsucc, unsigned long fatherdepth, GT_UNUSED GtBUinfo_rdjcv *father, unsigned long leafnumber, GtBUstate_rdjcv *state, GT_UNUSED GtError *err) { unsigned long seqnum; if (fatherdepth >= state->shortest) { if ((leafnumber == 0 || GT_ISIBITSET(state->sspbittab, leafnumber-1)) && GT_ISIBITSET(state->sspbittab, leafnumber + fatherdepth)) { seqnum = gt_encseq_seqnum(state->encseq, leafnumber); processcontained(seqnum, state); } } if (state->show_progressbar) state->progress++; return 0; }
static int gt_simpleexactselfmatchoutput(void *info, const GtGenericEncseq *genericencseq, GtUword len, GtUword pos1, GtUword pos2, GT_UNUSED GtError *err) { GtUword queryseqnum, seqstartpos, seqlength; GtQuerymatch *querymatch = (GtQuerymatch *) info; const GtEncseq *encseq; if (pos1 > pos2) { GtUword tmp = pos1; pos1 = pos2; pos2 = tmp; } gt_assert(genericencseq != NULL && genericencseq->hasencseq); encseq = genericencseq->seqptr.encseq; queryseqnum = gt_encseq_seqnum(encseq,pos2); seqstartpos = gt_encseq_seqstartpos(encseq, queryseqnum); seqlength = gt_encseq_seqlength(encseq, queryseqnum); gt_assert(pos2 >= seqstartpos); gt_querymatch_fill(querymatch, len, pos1, GT_READMODE_FORWARD, false, 0, 0, true, (uint64_t) queryseqnum, len, pos2 - seqstartpos); return gt_querymatch_output(info, encseq, querymatch, NULL, seqlength, err); }
static void gt_querysubstringmatch(bool selfmatch, const GtEncseq *dbencseq, const ESASuffixptr *suftabpart, GtReadmode readmode, GtUword numberofsuffixes, uint64_t queryunitnum, GtQueryrepresentation *queryrep, GtUword minmatchlength, GtProcessquerymatch processquerymatch, void *processquerymatchinfo, GtQuerymatch *querymatchspaceptr) { GtMMsearchiterator *mmsi; GtUword totallength, localqueryoffset = 0; uint64_t localqueryunitnum = queryunitnum; GtQuerysubstring querysubstring; gt_assert(numberofsuffixes > 0); totallength = gt_encseq_total_length(dbencseq); querysubstring.queryrep = queryrep; for (querysubstring.currentoffset = 0; querysubstring.currentoffset <= queryrep->seqlen - minmatchlength; querysubstring.currentoffset++) { GtUword dbstart; mmsi = gt_mmsearchiterator_new(dbencseq, suftabpart, 0, /* leftbound */ numberofsuffixes - 1, /* rightbound */ 0, /* offset */ readmode, &querysubstring, minmatchlength); while (gt_mmsearchiterator_next(&dbstart,mmsi)) { if (gt_mmsearch_isleftmaximal(dbencseq, readmode, dbstart, &querysubstring)) { GtUword dbseqnum, dbseqstartpos, dbseqlen, extend; extend = gt_mmsearch_extendright(dbencseq, mmsi->esr, readmode, totallength, dbstart + minmatchlength, &querysubstring, minmatchlength); if (gt_encseq_has_multiseq_support(dbencseq)) { dbseqnum = gt_encseq_seqnum(dbencseq,dbstart); dbseqstartpos = gt_encseq_seqstartpos(dbencseq,dbseqnum); dbseqlen = gt_encseq_seqlength(dbencseq,dbseqnum); } else { dbseqnum = dbseqstartpos = dbseqlen = 0; } gt_querymatch_init(querymatchspaceptr, minmatchlength + extend, dbstart, dbseqnum, dbstart - dbseqstartpos, dbseqlen, 0, /* score */ 0, /* edist */ selfmatch, localqueryunitnum, minmatchlength + extend, localqueryoffset, queryrep->seqlen); processquerymatch(processquerymatchinfo,querymatchspaceptr); } } gt_mmsearchiterator_delete(mmsi); mmsi = NULL; if (gt_mmsearch_accessquery(queryrep,querysubstring.currentoffset) == (GtUchar) SEPARATOR) { localqueryunitnum++; localqueryoffset = 0; } else { localqueryoffset++; } } }
void gt_queryuniquematch(bool selfmatch, const Suffixarray *suffixarray, uint64_t queryunitnum, GtQueryrepresentation *queryrep, GtUword minmatchlength, GtProcessquerymatch processquerymatch, void *processquerymatchinfo, GtQuerymatch *querymatchspaceptr) { GtUword offset, totallength = gt_encseq_total_length(suffixarray->encseq), localqueryoffset = 0; uint64_t localqueryunitnum = queryunitnum; gt_assert(!selfmatch && queryrep->seqlen >= minmatchlength); for (offset = 0; offset <= queryrep->seqlen - minmatchlength; offset++) { GtUword matchlen, dbstart; matchlen = gt_suffixarrayfindmums (suffixarray, 0, 0, /* leftbound */ totallength, /* rightbound */ &dbstart, queryrep->sequence + offset, queryrep->sequence + queryrep->seqlen); if (dbstart != ULONG_MAX && matchlen >= minmatchlength && gt_mum_isleftmaximal(suffixarray->encseq, suffixarray->readmode, dbstart, offset, queryrep->sequence)) { GtUword dbseqnum = gt_encseq_seqnum(suffixarray->encseq,dbstart), dbseqstartpos = gt_encseq_seqstartpos(suffixarray->encseq, dbseqnum), dbseqlen = gt_encseq_seqlength(suffixarray->encseq,dbseqnum); gt_querymatch_init(querymatchspaceptr, matchlen, dbstart, dbseqnum, dbstart - dbseqstartpos, dbseqlen, 0, /* score */ 0, /* edist */ selfmatch, localqueryunitnum, matchlen, localqueryoffset, queryrep->seqlen); processquerymatch(processquerymatchinfo,querymatchspaceptr); } if (queryrep->sequence[offset] == (GtUchar) SEPARATOR) { localqueryunitnum++; localqueryoffset = 0; } else { localqueryoffset++; } } }
static int gt_simplexdropselfmatchoutput(void *info, const GtGenericEncseq *genericencseq, GtUword len, GtUword pos1, GtUword pos2, GtError *err) { GtXdropmatchinfo *xdropmatchinfo = (GtXdropmatchinfo *) info; GtXdropscore score; GtUword dbseqnum, dbseqstartpos, dbseqlength, dbstart, dblen, querystart, queryseqnum, querylen, queryseqlength, queryseqstartpos, dbtotallength; const GtEncseq *encseq; gt_assert(genericencseq != NULL && genericencseq->hasencseq); encseq = genericencseq->seqptr.encseq; dbtotallength = gt_encseq_total_length(encseq); if (pos1 > pos2) { GtUword tmp = pos1; pos1 = pos2; pos2 = tmp; } dbseqnum = gt_encseq_seqnum(encseq,pos1), dbseqstartpos = gt_encseq_seqstartpos(encseq,dbseqnum), dbseqlength = gt_encseq_seqlength(encseq,dbseqnum); if (pos2 < dbseqstartpos + dbseqlength) { queryseqnum = dbseqnum; queryseqstartpos = dbseqstartpos; queryseqlength = dbseqlength; } else { queryseqnum = gt_encseq_seqnum(encseq,pos2); gt_assert(dbseqnum < queryseqnum); queryseqstartpos = gt_encseq_seqstartpos(encseq,queryseqnum); queryseqlength = gt_encseq_seqlength(encseq,queryseqnum); } if (pos1 > 0 && pos2 > 0) { gt_assert(pos1 >= dbseqstartpos && pos2 >= queryseqstartpos); gt_seqabstract_reinit_encseq(xdropmatchinfo->useq,encseq, pos1 - dbseqstartpos,0); gt_seqabstract_reinit_encseq(xdropmatchinfo->vseq,encseq, pos2 - queryseqstartpos,0); gt_evalxdroparbitscoresextend(false, &xdropmatchinfo->best_left, xdropmatchinfo->res, xdropmatchinfo->useq, xdropmatchinfo->vseq, pos1, pos2, xdropmatchinfo->belowscore); } else { xdropmatchinfo->best_left.ivalue = 0; xdropmatchinfo->best_left.jvalue = 0; xdropmatchinfo->best_left.score = 0; } if (pos1 + len < dbtotallength && pos2 + len < dbtotallength) { const GtUword seqend1 = dbseqstartpos + dbseqlength; const GtUword seqend2 = queryseqstartpos + queryseqlength; gt_assert(seqend1 >= pos1 + len && seqend2 >= pos2 + len); gt_seqabstract_reinit_encseq(xdropmatchinfo->useq, encseq,seqend1 - (pos1 + len),0); gt_seqabstract_reinit_encseq(xdropmatchinfo->vseq, encseq,seqend2 - (pos2 + len),0); gt_evalxdroparbitscoresextend(true, &xdropmatchinfo->best_right, xdropmatchinfo->res, xdropmatchinfo->useq, xdropmatchinfo->vseq, pos1 + len, pos2 + len, xdropmatchinfo->belowscore); } else { xdropmatchinfo->best_right.ivalue = 0; xdropmatchinfo->best_right.jvalue = 0; xdropmatchinfo->best_right.score = 0; } gt_assert(pos1 >= (GtUword) xdropmatchinfo->best_left.ivalue && pos2 >= (GtUword) xdropmatchinfo->best_left.jvalue); querystart = pos2 - xdropmatchinfo->best_left.jvalue; gt_assert(querystart >= queryseqstartpos); dblen = len + xdropmatchinfo->best_left.ivalue + xdropmatchinfo->best_right.ivalue; dbstart = pos1 - xdropmatchinfo->best_left.ivalue; querylen = len + xdropmatchinfo->best_left.jvalue + xdropmatchinfo->best_right.jvalue, score = (GtXdropscore) len * xdropmatchinfo->arbitscores.mat + xdropmatchinfo->best_left.score + xdropmatchinfo->best_right.score; gt_seqabstract_reinit_encseq(xdropmatchinfo->useq, encseq, dblen, dbstart); gt_seqabstract_reinit_encseq(xdropmatchinfo->vseq, encseq, querylen, querystart); gt_querymatch_fill(xdropmatchinfo->querymatchspaceptr, dblen, dbstart, GT_READMODE_FORWARD, false, score, greedyunitedist(xdropmatchinfo->frontresource, xdropmatchinfo->useq,xdropmatchinfo->vseq), true, (uint64_t) queryseqnum, querylen, querystart - queryseqstartpos); return gt_querymatch_output(info, encseq, xdropmatchinfo->querymatchspaceptr, NULL, gt_encseq_seqlength(encseq, queryseqnum), err); }
static int gt_processxdropquerymatches(void *info, const GtEncseq *encseq, const GtQuerymatch *querymatch, const GtUchar *query, GtUword query_totallength, GtError *err) { GtXdropmatchinfo *xdropmatchinfo = (GtXdropmatchinfo *) info; GtXdropscore score; GtUword querystart, dblen, dbstart, querylen; GtUword pos1 = gt_querymatch_dbstart(querymatch); GtUword pos2 = gt_querymatch_querystart(querymatch); GtUword len = gt_querymatch_querylen(querymatch); const GtUword dbtotallength = gt_encseq_total_length(encseq); uint64_t queryseqnum; GtUword dbseqnum, dbseqstartpos, dbseqlength; dbseqnum = gt_encseq_seqnum(encseq,pos1); dbseqstartpos = gt_encseq_seqstartpos(encseq,dbseqnum); dbseqlength = gt_encseq_seqlength(encseq,dbseqnum); if (pos1 > 0 && pos2 > 0) { gt_assert(dbseqstartpos < pos1); gt_seqabstract_reinit_encseq(xdropmatchinfo->useq,encseq, pos1 - dbseqstartpos,0); gt_seqabstract_reinit_gtuchar(xdropmatchinfo->vseq, query, pos2, 0); gt_evalxdroparbitscoresextend(false, &xdropmatchinfo->best_left, xdropmatchinfo->res, xdropmatchinfo->useq, xdropmatchinfo->vseq, pos1, pos2, xdropmatchinfo->belowscore); } else { xdropmatchinfo->best_left.ivalue = 0; xdropmatchinfo->best_left.jvalue = 0; xdropmatchinfo->best_left.score = 0; } if (pos1 + len < dbtotallength && pos2 + len < query_totallength) { gt_seqabstract_reinit_encseq(xdropmatchinfo->useq, encseq,dbseqstartpos + dbseqlength - (pos1 + len),0); gt_seqabstract_reinit_gtuchar(xdropmatchinfo->vseq, query,query_totallength - (pos2 + len), 0); gt_evalxdroparbitscoresextend(true, &xdropmatchinfo->best_right, xdropmatchinfo->res, xdropmatchinfo->useq, xdropmatchinfo->vseq, pos1 + len, pos2 + len, xdropmatchinfo->belowscore); } else { xdropmatchinfo->best_right.ivalue = 0; xdropmatchinfo->best_right.jvalue = 0; xdropmatchinfo->best_right.score = 0; } gt_assert(pos1 >= (GtUword) xdropmatchinfo->best_left.ivalue && pos2 >= (GtUword) xdropmatchinfo->best_left.jvalue); querystart = pos2 - xdropmatchinfo->best_left.jvalue; queryseqnum = gt_querymatch_queryseqnum(querymatch); dblen = len + xdropmatchinfo->best_left.ivalue + xdropmatchinfo->best_right.ivalue; dbstart = pos1 - xdropmatchinfo->best_left.ivalue; querylen = len + xdropmatchinfo->best_left.jvalue + xdropmatchinfo->best_right.jvalue, score = (GtXdropscore) len * xdropmatchinfo->arbitscores.mat + xdropmatchinfo->best_left.score + xdropmatchinfo->best_right.score; gt_seqabstract_reinit_encseq(xdropmatchinfo->useq, encseq, dblen, dbstart); gt_seqabstract_reinit_gtuchar(xdropmatchinfo->vseq, query, querylen, querystart); gt_querymatch_fill(xdropmatchinfo->querymatchspaceptr, dblen, dbstart, GT_READMODE_FORWARD, false, score, greedyunitedist(xdropmatchinfo->frontresource, xdropmatchinfo->useq,xdropmatchinfo->vseq), false, queryseqnum, querylen, querystart); return gt_querymatch_output(info, encseq, xdropmatchinfo->querymatchspaceptr, query, query_totallength, err); }
static int gt_seqorder_runner(GT_UNUSED int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { GtSeqorderArguments *arguments = tool_arguments; int had_err = 0; GtEncseq *encseq; GtEncseqLoader *loader; unsigned long i, nofseqs; gt_error_check(err); gt_assert(arguments != NULL); /* load encseq */ loader = gt_encseq_loader_new(); encseq = gt_encseq_loader_load(loader, argv[parsed_args], err); if (encseq == NULL) had_err = -1; if (had_err == 0 && !gt_encseq_has_description_support(encseq)) gt_warning("%s has no description support", argv[parsed_args]); if (!had_err) { nofseqs = gt_encseq_num_of_sequences(encseq); if (arguments->invert) { for (i = nofseqs; i > 0; i--) gt_seqorder_output(i - 1, encseq); } else if (arguments->shuffle) { unsigned long *seqnums; seqnums = gt_malloc(sizeof (unsigned long) * nofseqs); gt_seqorder_get_shuffled_seqnums(nofseqs, seqnums); for (i = 0; i < nofseqs; i++) gt_seqorder_output(seqnums[i], encseq); gt_free(seqnums); } else { GtSuffixsortspace *suffixsortspace; gt_assert(arguments->sort || arguments->revsort); suffixsortspace = gt_suffixsortspace_new(nofseqs, /* Use iterator over sequence separators: saves a lot of binary searches */ gt_encseq_seqstartpos(encseq, nofseqs-1), false,NULL); gt_seqorder_sort(suffixsortspace, encseq); if (arguments->sort) for (i = 0; i < nofseqs; i++) gt_seqorder_output(gt_encseq_seqnum(encseq, gt_suffixsortspace_getdirect(suffixsortspace, i)), encseq); else for (i = nofseqs; i > 0; i--) gt_seqorder_output(gt_encseq_seqnum(encseq, gt_suffixsortspace_getdirect(suffixsortspace, i - 1)), encseq); gt_suffixsortspace_delete(suffixsortspace, false); } } gt_encseq_loader_delete(loader); gt_encseq_delete(encseq); return had_err; }
GtUword gt_querymatch_dbseqnum(const GtEncseq *encseq, const GtQuerymatch *querymatch) { return gt_encseq_seqnum(encseq,querymatch->dbstart); }