int gt_callenumselfmatches(const char *indexname, GtReadmode queryreadmode, unsigned int userdefinedleastlength, GtProcessquerymatch processquerymatch, void *processquerymatchinfo, GtLogger *logger, GtError *err) { Suffixarray suffixarray; bool haserr = false; gt_assert(queryreadmode != GT_READMODE_FORWARD); if (gt_mapsuffixarray(&suffixarray, SARR_ESQTAB | SARR_SUFTAB | SARR_SSPTAB, indexname, logger, err) != 0) { haserr = true; } else { unsigned long seqnum, numofsequences, seqlength, seqstartpos; GtQuerymatch *querymatchspaceptr = gt_querymatch_new(); GtQueryrep queryrep; numofsequences = gt_encseq_num_of_sequences(suffixarray.encseq); queryrep.sequence = NULL; queryrep.reversecopy = false; queryrep.encseq = suffixarray.encseq; queryrep.readmode = queryreadmode; for (seqnum = 0; seqnum < numofsequences; seqnum++) { seqstartpos = gt_encseq_seqstartpos(suffixarray.encseq, seqnum); seqlength = gt_encseq_seqlength(suffixarray.encseq, seqnum); if (seqlength >= (unsigned long) userdefinedleastlength) { queryrep.startpos = seqstartpos; queryrep.length = seqlength; if (gt_querysubstringmatch(true, &suffixarray, (uint64_t) seqnum, &queryrep, (unsigned long) userdefinedleastlength, processquerymatch, processquerymatchinfo, querymatchspaceptr, err) != 0) { haserr = true; break; } } } gt_querymatch_delete(querymatchspaceptr); } gt_freesuffixarray(&suffixarray); return haserr ? -1 : 0; }
static int gt_constructsarrandrunmmsearch( const GtEncseq *dbencseq, GtReadmode readmode, unsigned int prefixlength, unsigned int numofparts, GtUword maximumspace, const GtUchar *query, GtUword querylen, unsigned int minlength, GtProcessquerymatch processquerymatch, void *processquerymatchinfo, GtTimer *sfxprogress, bool withprogressbar, GtLogger *logger, GtError *err) { bool haserr = false; Sfxiterator *sfi; Sfxstrategy sfxstrategy; defaultsfxstrategy(&sfxstrategy, gt_encseq_bitwise_cmp_ok(dbencseq) ? false : true); sfxstrategy.outsuftabonfile = false; sfi = gt_Sfxiterator_new(dbencseq, readmode, prefixlength, numofparts, maximumspace, &sfxstrategy, /* sfxstrategy */ sfxprogress, withprogressbar, logger, /* logger */ err); if (sfi == NULL) { haserr = true; } else { const GtSuffixsortspace *suffixsortspace; GtUword numberofsuffixes; GtQuerymatch *querymatchspaceptr = gt_querymatch_new(); GtQueryrepresentation queryrep; queryrep.sequence = query; queryrep.encseq = NULL; queryrep.readmode = GT_READMODE_FORWARD; queryrep.startpos = 0; queryrep.seqlen = querylen; while (true) { suffixsortspace = gt_Sfxiterator_next(&numberofsuffixes,NULL,sfi); if (suffixsortspace == NULL) { break; } gt_querysubstringmatch(false, dbencseq, (const ESASuffixptr *) gt_suffixsortspace_ulong_get(suffixsortspace), readmode, numberofsuffixes, 0, &queryrep, (GtUword) minlength, processquerymatch, processquerymatchinfo, querymatchspaceptr); } gt_querymatch_delete(querymatchspaceptr); } if (gt_Sfxiterator_delete(sfi,err) != 0) { haserr = true; } return haserr ? -1 : 0; }
static int gt_repfind_runner(GT_UNUSED int argc, GT_UNUSED const char **argv, GT_UNUSED int parsed_args, void *tool_arguments, GtError *err) { bool haserr = false; Maxpairsoptions *arguments = tool_arguments; GtLogger *logger = NULL; GtQuerymatch *querymatchspaceptr = gt_querymatch_new(); GtXdropmatchinfo xdropmatchinfo; gt_error_check(err); xdropmatchinfo.querymatchspaceptr = querymatchspaceptr; xdropmatchinfo.useq = gt_seqabstract_new_empty(); xdropmatchinfo.vseq = gt_seqabstract_new_empty(); xdropmatchinfo.arbitscores.mat = 2; xdropmatchinfo.arbitscores.mis = -2; xdropmatchinfo.arbitscores.ins = -3; xdropmatchinfo.arbitscores.del = -3; xdropmatchinfo.frontresource = gt_frontresource_new(100UL); xdropmatchinfo.res = gt_xdrop_resources_new(&xdropmatchinfo.arbitscores); xdropmatchinfo.belowscore = 5L; logger = gt_logger_new(arguments->beverbose, GT_LOGGER_DEFLT_PREFIX, stdout); if (parsed_args < argc) { gt_error_set(err,"superfluous arguments: \"%s\"",argv[argc-1]); haserr = true; } if (!haserr) { if (gt_str_array_size(arguments->queryfiles) == 0) { if (arguments->samples == 0) { if (arguments->forward) { GtProcessmaxpairs processmaxpairs; void *processmaxpairsdata; if (arguments->searchspm) { processmaxpairs = gt_simplesuffixprefixmatchoutput; processmaxpairsdata = NULL; } else { if (arguments->extendseed) { processmaxpairs = gt_simplexdropselfmatchoutput; processmaxpairsdata = (void *) &xdropmatchinfo; } else { processmaxpairs = gt_simpleexactselfmatchoutput; processmaxpairsdata = (void *) querymatchspaceptr; } } if (gt_callenummaxpairs(gt_str_get(arguments->indexname), arguments->userdefinedleastlength, arguments->scanfile, processmaxpairs, processmaxpairsdata, logger, err) != 0) { haserr = true; } } if (!haserr && arguments->reverse) { if (gt_callenumselfmatches(gt_str_get(arguments->indexname), GT_READMODE_REVERSE, arguments->userdefinedleastlength, /*arguments->extendseed ? gt_processxdropquerymatches :*/ gt_querymatch_output, /*arguments->extendseed ? (void *) &xdropmatchinfo :*/ NULL, logger, err) != 0) { haserr = true; } } } else { if (gt_testmaxpairs(gt_str_get(arguments->indexname), arguments->samples, arguments->userdefinedleastlength, (GtUword) (100 * arguments->userdefinedleastlength), logger, err) != 0) { haserr = true; } } } else { if (gt_callenumquerymatches(gt_str_get(arguments->indexname), arguments->queryfiles, false, true, false, arguments->userdefinedleastlength, NULL, arguments->extendseed ? gt_processxdropquerymatches : gt_querymatch_output, arguments->extendseed ? (void *) &xdropmatchinfo : NULL, logger, err) != 0) { haserr = true; } } } gt_querymatch_delete(querymatchspaceptr); gt_seqabstract_delete(xdropmatchinfo.useq); gt_seqabstract_delete(xdropmatchinfo.vseq); gt_xdrop_resources_delete(xdropmatchinfo.res); gt_frontresource_delete(xdropmatchinfo.frontresource); gt_logger_delete(logger); return haserr ? -1 : 0; }
static int gt_callenumquerymatches_withindex( GtQuerysubstringmatchfunc findquerymatches, const Suffixarray *suffixarray, const GtStrArray *queryfiles, bool forwardstrand, bool reversestrand, unsigned int userdefinedleastlength, GtProcessquerybeforematching processquerybeforematching, GtProcessquerymatch processquerymatch, void *processquerymatchinfo, GtError *err) { GtSeqIterator *seqit; bool haserr = false; seqit = gt_seq_iterator_sequence_buffer_new(queryfiles, err); if (seqit == NULL) { haserr = true; } else { GtQuerymatch *querymatchspaceptr = gt_querymatch_new(); const GtUchar *query; unsigned long querylen; int retval; uint64_t queryunitnum; GtUchar *queryreverse = NULL; unsigned long queryreverse_length = 0; char *desc = NULL; int mode; gt_seq_iterator_set_symbolmap(seqit, gt_alphabet_symbolmap(gt_encseq_alphabet( suffixarray->encseq))); for (queryunitnum = 0; /* Nothing */; queryunitnum++) { retval = gt_seq_iterator_next(seqit, &query, &querylen, &desc, err); if (retval < 0) { haserr = true; break; } if (retval == 0) { break; } if (querylen >= (unsigned long) userdefinedleastlength) { GtQueryrep queryrep; queryrep.encseq = NULL; queryrep.readmode = GT_READMODE_FORWARD; queryrep.startpos = 0; queryrep.length = querylen; for (mode = 0; mode <= 1; mode++) { if (mode == 0 && forwardstrand) { queryrep.sequence = query; queryrep.reversecopy = false; if (processquerybeforematching != NULL) { processquerybeforematching(processquerymatchinfo,desc,query, querylen,true); } } else { if (mode == 1 && reversestrand) { if (querylen > queryreverse_length) { queryreverse = gt_realloc(queryreverse, sizeof (*queryreverse) * querylen); queryreverse_length = querylen; } gt_copy_reversecomplement(queryreverse,query,querylen); queryrep.sequence = queryreverse; queryrep.reversecopy = true; if (processquerybeforematching != NULL) { processquerybeforematching(processquerymatchinfo,desc, queryreverse,querylen,false); } } else { queryrep.sequence = NULL; queryrep.reversecopy = false; } } if (queryrep.sequence != NULL) { int ret = findquerymatches(false, suffixarray, queryunitnum, &queryrep, (unsigned long) userdefinedleastlength, processquerymatch, processquerymatchinfo, querymatchspaceptr, err); if (ret != 0) { haserr = true; break; } } } } } gt_seq_iterator_delete(seqit); gt_free(queryreverse); gt_querymatch_delete(querymatchspaceptr); } return haserr ? -1 : 0; }