static int storemaxmatchquery(void *info, GT_UNUSED const GtEncseq *encseq, const GtQuerymatch *querymatch, GT_UNUSED const GtUchar *query, GT_UNUSED GtUword query_totallength, GT_UNUSED GtError *err) { GtArray *tab = (GtArray *) info; Substringmatch subm; subm.len = gt_querymatch_querylen(querymatch); subm.dbstart = gt_querymatch_dbstart(querymatch); subm.querystart = gt_querymatch_querystart(querymatch); subm.queryseqnum = gt_querymatch_queryseqnum(querymatch); gt_array_add(tab,subm); return 0; }
static int gt_processxdropquerymatches(void *info, const GtEncseq *encseq, const GtQuerymatch *querymatch, const GtUchar *query, GtUword query_totallength, GtError *err) { GtXdropmatchinfo *xdropmatchinfo = (GtXdropmatchinfo *) info; GtXdropscore score; GtUword querystart, dblen, dbstart, querylen; GtUword pos1 = gt_querymatch_dbstart(querymatch); GtUword pos2 = gt_querymatch_querystart(querymatch); GtUword len = gt_querymatch_querylen(querymatch); const GtUword dbtotallength = gt_encseq_total_length(encseq); uint64_t queryseqnum; GtUword dbseqnum, dbseqstartpos, dbseqlength; dbseqnum = gt_encseq_seqnum(encseq,pos1); dbseqstartpos = gt_encseq_seqstartpos(encseq,dbseqnum); dbseqlength = gt_encseq_seqlength(encseq,dbseqnum); if (pos1 > 0 && pos2 > 0) { gt_assert(dbseqstartpos < pos1); gt_seqabstract_reinit_encseq(xdropmatchinfo->useq,encseq, pos1 - dbseqstartpos,0); gt_seqabstract_reinit_gtuchar(xdropmatchinfo->vseq, query, pos2, 0); gt_evalxdroparbitscoresextend(false, &xdropmatchinfo->best_left, xdropmatchinfo->res, xdropmatchinfo->useq, xdropmatchinfo->vseq, pos1, pos2, xdropmatchinfo->belowscore); } else { xdropmatchinfo->best_left.ivalue = 0; xdropmatchinfo->best_left.jvalue = 0; xdropmatchinfo->best_left.score = 0; } if (pos1 + len < dbtotallength && pos2 + len < query_totallength) { gt_seqabstract_reinit_encseq(xdropmatchinfo->useq, encseq,dbseqstartpos + dbseqlength - (pos1 + len),0); gt_seqabstract_reinit_gtuchar(xdropmatchinfo->vseq, query,query_totallength - (pos2 + len), 0); gt_evalxdroparbitscoresextend(true, &xdropmatchinfo->best_right, xdropmatchinfo->res, xdropmatchinfo->useq, xdropmatchinfo->vseq, pos1 + len, pos2 + len, xdropmatchinfo->belowscore); } else { xdropmatchinfo->best_right.ivalue = 0; xdropmatchinfo->best_right.jvalue = 0; xdropmatchinfo->best_right.score = 0; } gt_assert(pos1 >= (GtUword) xdropmatchinfo->best_left.ivalue && pos2 >= (GtUword) xdropmatchinfo->best_left.jvalue); querystart = pos2 - xdropmatchinfo->best_left.jvalue; queryseqnum = gt_querymatch_queryseqnum(querymatch); dblen = len + xdropmatchinfo->best_left.ivalue + xdropmatchinfo->best_right.ivalue; dbstart = pos1 - xdropmatchinfo->best_left.ivalue; querylen = len + xdropmatchinfo->best_left.jvalue + xdropmatchinfo->best_right.jvalue, score = (GtXdropscore) len * xdropmatchinfo->arbitscores.mat + xdropmatchinfo->best_left.score + xdropmatchinfo->best_right.score; gt_seqabstract_reinit_encseq(xdropmatchinfo->useq, encseq, dblen, dbstart); gt_seqabstract_reinit_gtuchar(xdropmatchinfo->vseq, query, querylen, querystart); gt_querymatch_fill(xdropmatchinfo->querymatchspaceptr, dblen, dbstart, GT_READMODE_FORWARD, false, score, greedyunitedist(xdropmatchinfo->frontresource, xdropmatchinfo->useq,xdropmatchinfo->vseq), false, queryseqnum, querylen, querystart); return gt_querymatch_output(info, encseq, xdropmatchinfo->querymatchspaceptr, query, query_totallength, err); }
static int gt_show_seedext_runner(GT_UNUSED int argc, GT_UNUSED const char **argv, GT_UNUSED int parsed_args, void *tool_arguments, GtError *err) { int had_err = 0; GtUword alignmentwidth; GtShowSeedextArguments *arguments = tool_arguments; GtSeedextendMatchIterator *semi; gt_error_check(err); gt_assert(arguments != NULL); /* Parse option string in first line of file specified by filename. */ alignmentwidth = arguments->show_alignment ? 70 : 0; semi = gt_seedextend_match_iterator_new(arguments->matchfilename,err); if (semi == NULL) { had_err = -1; } /* Parse seed extensions. */ if (!had_err) { const GtEncseq *aencseq = gt_seedextend_match_iterator_aencseq(semi), *bencseq = gt_seedextend_match_iterator_bencseq(semi); GtAlignment *alignment = gt_alignment_new(); Polishing_info *pol_info = NULL; GtSequencepairbuffer seqpairbuf = {NULL,NULL,0,0}; /* the following are used if seed_extend is set */ GtGreedyextendmatchinfo *greedyextendmatchinfo = NULL; GtProcessinfo_and_querymatchspaceptr processinfo_and_querymatchspaceptr; const GtUchar *characters = gt_encseq_alphabetcharacters(aencseq); const GtUchar wildcardshow = gt_encseq_alphabetwildcardshow(aencseq); GtUchar *alignment_show_buffer = arguments->show_alignment ? gt_alignment_buffer_new(alignmentwidth) : NULL; GtLinspaceManagement *linspace_spacemanager = gt_linspaceManagement_new(); GtScoreHandler *linspace_scorehandler = gt_scorehandler_new(0,1,0,1);; if (!arguments->relax_polish) { double matchscore_bias = GT_DEFAULT_MATCHSCORE_BIAS; if (gt_seedextend_match_iterator_bias_parameters(semi)) { matchscore_bias = gt_greedy_dna_sequence_bias_get(aencseq); } pol_info = polishing_info_new_with_bias( gt_seedextend_match_iterator_errorpercentage(semi), matchscore_bias, gt_seedextend_match_iterator_history_size(semi)); } if (arguments->seed_display) { gt_seedextend_match_iterator_seed_display_set(semi); } if (arguments->show_alignment || arguments->showeoplist) { gt_seedextend_match_iterator_querymatchoutoptions_set(semi, true, arguments->showeoplist, alignmentwidth, !arguments->relax_polish, arguments->seed_display); } if (arguments->seed_extend) { greedyextendmatchinfo = gt_greedy_extend_matchinfo_new(70, GT_MAX_ALI_LEN_DIFF, gt_seedextend_match_iterator_history_size(semi), GT_MIN_PERC_MAT_HISTORY, 0, /* userdefinedleastlength */ GT_EXTEND_CHAR_ACCESS_ANY, 100, pol_info); } if (pol_info != NULL) { gt_alignment_polished_ends(alignment,pol_info,false); } processinfo_and_querymatchspaceptr.processinfo = greedyextendmatchinfo; if (arguments->sortmatches) { (void) gt_seedextend_match_iterator_all_sorted(semi,true); } while (true) { GtQuerymatch *querymatchptr = gt_seedextend_match_iterator_next(semi); if (querymatchptr == NULL) { break; } if (gt_seedextend_match_iterator_has_seedline(semi)) { if (arguments->seed_extend) { if (aencseq == bencseq) { const GtUword seedlen = gt_seedextend_match_iterator_seedlen(semi), seedpos1 = gt_seedextend_match_iterator_seedpos1(semi), seedpos2 = gt_seedextend_match_iterator_seedpos2(semi); processinfo_and_querymatchspaceptr.querymatchspaceptr = querymatchptr; had_err = gt_greedy_extend_selfmatch_with_output( &processinfo_and_querymatchspaceptr, aencseq, seedlen, seedpos1, seedpos2, err); if (had_err) { break; } } else { gt_assert(false); } } else { const GtUword query_totallength = gt_encseq_seqlength(bencseq, gt_querymatch_queryseqnum(querymatchptr)); gt_show_seed_extend_encseq(querymatchptr, aencseq, bencseq, query_totallength); } } else { gt_show_seed_extend_plain(&seqpairbuf, linspace_spacemanager, linspace_scorehandler, alignment, alignment_show_buffer, alignmentwidth, arguments->showeoplist, characters, wildcardshow, aencseq, bencseq, querymatchptr); } } polishing_info_delete(pol_info); gt_greedy_extend_matchinfo_delete(greedyextendmatchinfo); gt_free(alignment_show_buffer); gt_scorehandler_delete(linspace_scorehandler); gt_linspaceManagement_delete(linspace_spacemanager); gt_free(seqpairbuf.a_sequence); gt_free(seqpairbuf.b_sequence); gt_alignment_delete(alignment); } gt_seedextend_match_iterator_delete(semi); return had_err; }
static void gt_show_seed_extend_plain(GtSequencepairbuffer *seqpairbuf, GtLinspaceManagement *linspace_spacemanager, GtScoreHandler *linspace_scorehandler, GtAlignment *alignment, GtUchar *alignment_show_buffer, GtUword alignmentwidth, bool showeoplist, const GtUchar *characters, GtUchar wildcardshow, const GtEncseq *aencseq, const GtEncseq *bencseq, const GtQuerymatch *querymatchptr) { GtUword edist; GtReadmode query_readmode = gt_querymatch_query_readmode(querymatchptr); const GtUword distance = gt_querymatch_distance(querymatchptr), dblen = gt_querymatch_dblen(querymatchptr), queryseqnum = gt_querymatch_queryseqnum(querymatchptr), querystart_fwdstrand = gt_querymatch_querystart_fwdstrand(querymatchptr), querylen = gt_querymatch_querylen(querymatchptr); const GtUword apos_ab = gt_querymatch_dbstart(querymatchptr); const GtUword bpos_ab = gt_encseq_seqstartpos(bencseq, queryseqnum) + querystart_fwdstrand; gt_querymatch_coordinates_out(querymatchptr); if (dblen >= seqpairbuf->a_allocated) { seqpairbuf->a_sequence = gt_realloc(seqpairbuf->a_sequence, sizeof *seqpairbuf->a_sequence * dblen); seqpairbuf->a_allocated = dblen; } if (querylen >= seqpairbuf->b_allocated) { seqpairbuf->b_sequence = gt_realloc(seqpairbuf->b_sequence, sizeof *seqpairbuf->b_sequence * querylen); seqpairbuf->b_allocated = querylen; } gt_encseq_extract_encoded(aencseq, seqpairbuf->a_sequence, apos_ab, apos_ab + dblen - 1); gt_encseq_extract_encoded(bencseq, seqpairbuf->b_sequence, bpos_ab, bpos_ab + querylen - 1); if (query_readmode != GT_READMODE_FORWARD) { gt_assert(query_readmode == GT_READMODE_REVCOMPL); gt_inplace_reverse_complement(seqpairbuf->b_sequence,querylen); } edist = gt_computelinearspace_generic(linspace_spacemanager, linspace_scorehandler, alignment, seqpairbuf->a_sequence, 0, dblen, seqpairbuf->b_sequence, 0, querylen); if (edist < distance) { printf("# edist=" GT_WU " (smaller by " GT_WU ")\n",edist,distance - edist); } gt_assert(edist <= distance); if (alignmentwidth > 0) { gt_alignment_show_generic(alignment_show_buffer, false, alignment, stdout, alignmentwidth, characters, wildcardshow); } if (showeoplist && distance > 0) { gt_alignment_show_multieop_list(alignment, stdout); } if (alignmentwidth > 0 || showeoplist) { gt_alignment_reset(alignment); } }