/* create a global alignment in square space with constant cost values, * to use it in linear context you have to generate an spacemanager before, * in any other case it can be NULL */ GtUword alignment_in_square_space(GtLinspaceManagement *spacemanager, GtAlignment *align, const GtUchar *useq, GtUword ustart, GtUword ulen, const GtUchar *vseq, GtUword vstart, GtUword vlen, GtUword matchcost, GtUword mismatchcost, GtUword gapcost) { GtUword distance; GtScoreHandler *scorehandler; gt_assert(align); scorehandler = gt_scorehandler_new(matchcost, mismatchcost, 0, gapcost); distance = alignment_in_square_space_generic (spacemanager, align, useq, ustart, ulen, vseq, vstart, vlen, scorehandler); gt_scorehandler_delete(scorehandler); return distance; }
/* create an local alignment in square space with constant score values, * to use it in linear context you have to generate an spacemanager before, * in any other case it can be NULL */ GtWord alignment_in_square_space_local(GtLinspaceManagement *spacemanager, GtAlignment *align, const GtUchar *useq, GtUword ustart, GtUword ulen, const GtUchar *vseq, GtUword vstart, GtUword vlen, GtWord matchscore, GtWord mismatchscore, GtWord gapscore) { GtWord score; gt_assert(align); GtScoreHandler *scorehandler = gt_scorehandler_new(matchscore, mismatchscore, 0, gapscore); score = alignment_in_square_space_local_generic(spacemanager, align, useq, ustart, ulen, vseq, vstart, vlen, scorehandler); gt_scorehandler_delete(scorehandler); return score; }
GtScoreHandler *gt_scorehandler2costhandler(const GtScoreHandler *scorehandler) { GtScoreHandler *costhandler; gt_assert(scorehandler != NULL); if (scorehandler->scorematrix == NULL) { GtWord matchscore, mismatchscore, gap_extension, gap_opening, maxscore = MAX(MAX(GT_DIV2(scorehandler->matchscore+1), GT_DIV2(scorehandler->mismatchscore+1)), MAX(1 + scorehandler->gap_extension,0)); matchscore = 2 * maxscore - scorehandler->matchscore; mismatchscore = 2 * maxscore - scorehandler->mismatchscore; gap_extension = maxscore - scorehandler->gap_extension; gap_opening = -scorehandler->gap_opening; costhandler = gt_scorehandler_new(matchscore, mismatchscore, gap_opening, gap_extension); if (!scorehandler->mappedsequence) { gt_scorehandler_plain(costhandler); } } else { int maxscore; GtWord gap_extension, gap_opening; unsigned int i, j, dim = gt_score_matrix_get_dimension(scorehandler->scorematrix); GtScoreMatrix *costmatrix = gt_score_matrix_clone_empty(scorehandler->scorematrix); for (maxscore = 0, i = 0; i < dim; i++) { for (j = 0; j < dim; j++) { int val = gt_score_matrix_get_score(scorehandler->scorematrix, i, j); if (val > maxscore) { maxscore = val; } } } maxscore = MAX(GT_DIV2(maxscore+1), 1 + scorehandler->gap_extension); for (i = 0; i < dim; i++) { for (j = 0; j < dim; j++) { /* translate */ int score = gt_score_matrix_get_score(scorehandler->scorematrix,i,j); gt_score_matrix_set_score(costmatrix, i, j, 2 * maxscore - score); } } gap_extension = maxscore - scorehandler->gap_extension; gap_opening = -scorehandler->gap_opening; costhandler = gt_scorehandler_new( 0,0, gap_opening, gap_extension); gt_scorehandler_add_scorematrix(costhandler,costmatrix); } return costhandler; }
/* handle score and cost values */ static GtScoreHandler *gt_arguments2scorehandler( const GtLinspaceArguments *arguments, GtError *err) { GtWord matchscore, mismatchscore, gap_open, gap_extension; GtScoreHandler *scorehandler = NULL; GtScoreMatrix *scorematrix = NULL; int had_err = 0; gt_error_check(err); if (gt_str_array_size(arguments->linearcosts) > 0) { GtUword wordindex = 0; if (arguments->protein) { scorematrix = gt_score_matrix_new_read_protein( gt_str_array_get(arguments->linearcosts,wordindex++), err); if (scorematrix == NULL) { had_err = -1; } matchscore = 0; mismatchscore = 0; } else { had_err = gt_parse_score_value(__LINE__,&matchscore, gt_str_array_get(arguments->linearcosts, wordindex++), arguments->global,err); if (!had_err) { had_err = gt_parse_score_value(__LINE__,&mismatchscore, gt_str_array_get(arguments->linearcosts, wordindex++), arguments->global,err); } } if (!had_err) { gap_open = 0; had_err = gt_parse_score_value(__LINE__,&gap_extension, gt_str_array_get(arguments->linearcosts, wordindex++), false,err); } } else /*if (gt_str_array_size(arguments->affinecosts) > 0)*/ { GtUword wordindex = 0; if (arguments->protein) { scorematrix = gt_score_matrix_new_read_protein( gt_str_array_get(arguments->affinecosts, wordindex++), err); if (scorematrix == NULL) { had_err = -1; } matchscore = mismatchscore = 0; } else { had_err = gt_parse_score_value(__LINE__,&matchscore, gt_str_array_get(arguments->affinecosts, wordindex++), arguments->global,err); if (!had_err) { had_err = gt_parse_score_value(__LINE__,&mismatchscore, gt_str_array_get(arguments->affinecosts, wordindex++), arguments->global,err); } } if (!had_err) { had_err = gt_parse_score_value(__LINE__,&gap_open, gt_str_array_get(arguments->affinecosts, wordindex++), false,err); } if (!had_err) { had_err = gt_parse_score_value(__LINE__,&gap_extension, gt_str_array_get(arguments->affinecosts, wordindex), false,err); } } if (!had_err) { scorehandler = gt_scorehandler_new(matchscore, mismatchscore, gap_open, gap_extension); if (scorematrix != NULL) { gt_scorehandler_add_scorematrix(scorehandler,scorematrix); } } return scorehandler; }
void gt_linearalign_check_local(GT_UNUSED bool forward, const GtUchar *useq, GtUword ulen, const GtUchar *vseq, GtUword vlen) { GtAlignment *align; GtWord score1, score2, score3, score4, matchscore = 2, mismatchscore = -2, gapscore = -1; GtLinspaceManagement *spacemanager; GtScoreHandler *scorehandler; if (memchr(useq, LINEAR_EDIST_GAP,ulen) != NULL) { fprintf(stderr,"%s: sequence u contains gap symbol\n",__func__); exit(GT_EXIT_PROGRAMMING_ERROR); } if (memchr(vseq, LINEAR_EDIST_GAP,vlen) != NULL) { fprintf(stderr,"%s: sequence v contains gap symbol\n",__func__); exit(GT_EXIT_PROGRAMMING_ERROR); } scorehandler = gt_scorehandler_new(matchscore, mismatchscore, 0, gapscore); gt_scorehandler_plain(scorehandler); spacemanager = gt_linspace_management_new(); align = gt_alignment_new(); score1 = gt_linearalign_compute_local_generic(spacemanager, scorehandler, align, useq, 0, ulen, vseq, 0, vlen); score2 = gt_alignment_eval_with_score(align, true, matchscore, mismatchscore, gapscore); gt_linspace_management_delete(spacemanager); gt_scorehandler_delete(scorehandler); if (score1 != score2) { fprintf(stderr,"gt_linearalign_compute_local_generic = "GT_WD" != "GT_WD " = gt_alignment_eval_generic_with_score\n", score1, score2); exit(GT_EXIT_PROGRAMMING_ERROR); } gt_alignment_reset(align); score3 = gt_squarealign_calculate_local(NULL, align, useq, 0, ulen, vseq, 0, vlen, matchscore, mismatchscore, gapscore); if (score1 != score3) { fprintf(stderr,"gt_linearalign_compute_local_generic = "GT_WD" != "GT_WD " = gt_squarealign_calculate_local\n", score1, score3); exit(GT_EXIT_PROGRAMMING_ERROR); } score4 = gt_alignment_eval_with_score(align, true, matchscore, mismatchscore, gapscore); if (score3 != score4) { fprintf(stderr,"gt_squarealign_calculate_local = "GT_WD" != "GT_WD " = gt_alignment_eval_generic_with_score\n", score3, score4); exit(GT_EXIT_PROGRAMMING_ERROR); } gt_alignment_delete(align); }
void gt_linearalign_check(GT_UNUSED bool forward, const GtUchar *useq, GtUword ulen, const GtUchar *vseq, GtUword vlen) { GtAlignment *align; GtUword edist1, edist2, edist3, edist4, matchcost = 0, mismatchcost = 1, gapcost = 1; GtLinspaceManagement *spacemanager; GtScoreHandler *scorehandler; const bool downcase = true; if (memchr(useq, LINEAR_EDIST_GAP,ulen) != NULL) { fprintf(stderr,"%s: sequence u contains gap symbol\n",__func__); exit(GT_EXIT_PROGRAMMING_ERROR); } if (memchr(vseq, LINEAR_EDIST_GAP,vlen) != NULL) { fprintf(stderr,"%s: sequence v contains gap symbol\n",__func__); exit(GT_EXIT_PROGRAMMING_ERROR); } scorehandler = gt_scorehandler_new(matchcost, mismatchcost, 0, gapcost); gt_scorehandler_plain(scorehandler); gt_scorehandler_downcase(scorehandler); spacemanager = gt_linspace_management_new(); align = gt_alignment_new_with_seqs(useq, ulen, vseq, vlen); edist1 = gt_calc_linearalign(spacemanager, scorehandler, align, useq, 0, ulen, vseq, 0, vlen); edist2 = gt_squarealign_global_distance_only(useq, 0, ulen, vseq, 0, vlen, scorehandler); if (edist1 != edist2) { fprintf(stderr,"gt_calc_linearalign = "GT_WU" != "GT_WU " = gt_squarealign_global_distance_only\n", edist1,edist2); exit(GT_EXIT_PROGRAMMING_ERROR); } edist3 = gt_alignment_eval_with_score(align, true, matchcost, mismatchcost, gapcost); if (edist2 != edist3) { fprintf(stderr,"gt_squarealign_global_distance_only = "GT_WU" != "GT_WU " = gt_alignment_eval_with_score\n", edist2,edist3); exit(GT_EXIT_PROGRAMMING_ERROR); } edist4 = gt_calc_linearedist(downcase,useq, ulen, vseq, vlen); if (edist3 != edist4) { fprintf(stderr,"gt_alignment_eval_with_score = "GT_WU" != "GT_WU " = gt_calc_linearedist\n", edist3, edist4); exit(GT_EXIT_PROGRAMMING_ERROR); } gt_linspace_management_delete(spacemanager); gt_scorehandler_delete(scorehandler); gt_alignment_delete(align); }
static int gt_show_seedext_runner(GT_UNUSED int argc, GT_UNUSED const char **argv, GT_UNUSED int parsed_args, void *tool_arguments, GtError *err) { int had_err = 0; GtUword alignmentwidth; GtShowSeedextArguments *arguments = tool_arguments; GtSeedextendMatchIterator *semi; gt_error_check(err); gt_assert(arguments != NULL); /* Parse option string in first line of file specified by filename. */ alignmentwidth = arguments->show_alignment ? 70 : 0; semi = gt_seedextend_match_iterator_new(arguments->matchfilename,err); if (semi == NULL) { had_err = -1; } /* Parse seed extensions. */ if (!had_err) { const GtEncseq *aencseq = gt_seedextend_match_iterator_aencseq(semi), *bencseq = gt_seedextend_match_iterator_bencseq(semi); GtAlignment *alignment = gt_alignment_new(); Polishing_info *pol_info = NULL; GtSequencepairbuffer seqpairbuf = {NULL,NULL,0,0}; /* the following are used if seed_extend is set */ GtGreedyextendmatchinfo *greedyextendmatchinfo = NULL; GtProcessinfo_and_querymatchspaceptr processinfo_and_querymatchspaceptr; const GtUchar *characters = gt_encseq_alphabetcharacters(aencseq); const GtUchar wildcardshow = gt_encseq_alphabetwildcardshow(aencseq); GtUchar *alignment_show_buffer = arguments->show_alignment ? gt_alignment_buffer_new(alignmentwidth) : NULL; GtLinspaceManagement *linspace_spacemanager = gt_linspaceManagement_new(); GtScoreHandler *linspace_scorehandler = gt_scorehandler_new(0,1,0,1);; if (!arguments->relax_polish) { double matchscore_bias = GT_DEFAULT_MATCHSCORE_BIAS; if (gt_seedextend_match_iterator_bias_parameters(semi)) { matchscore_bias = gt_greedy_dna_sequence_bias_get(aencseq); } pol_info = polishing_info_new_with_bias( gt_seedextend_match_iterator_errorpercentage(semi), matchscore_bias, gt_seedextend_match_iterator_history_size(semi)); } if (arguments->seed_display) { gt_seedextend_match_iterator_seed_display_set(semi); } if (arguments->show_alignment || arguments->showeoplist) { gt_seedextend_match_iterator_querymatchoutoptions_set(semi, true, arguments->showeoplist, alignmentwidth, !arguments->relax_polish, arguments->seed_display); } if (arguments->seed_extend) { greedyextendmatchinfo = gt_greedy_extend_matchinfo_new(70, GT_MAX_ALI_LEN_DIFF, gt_seedextend_match_iterator_history_size(semi), GT_MIN_PERC_MAT_HISTORY, 0, /* userdefinedleastlength */ GT_EXTEND_CHAR_ACCESS_ANY, 100, pol_info); } if (pol_info != NULL) { gt_alignment_polished_ends(alignment,pol_info,false); } processinfo_and_querymatchspaceptr.processinfo = greedyextendmatchinfo; if (arguments->sortmatches) { (void) gt_seedextend_match_iterator_all_sorted(semi,true); } while (true) { GtQuerymatch *querymatchptr = gt_seedextend_match_iterator_next(semi); if (querymatchptr == NULL) { break; } if (gt_seedextend_match_iterator_has_seedline(semi)) { if (arguments->seed_extend) { if (aencseq == bencseq) { const GtUword seedlen = gt_seedextend_match_iterator_seedlen(semi), seedpos1 = gt_seedextend_match_iterator_seedpos1(semi), seedpos2 = gt_seedextend_match_iterator_seedpos2(semi); processinfo_and_querymatchspaceptr.querymatchspaceptr = querymatchptr; had_err = gt_greedy_extend_selfmatch_with_output( &processinfo_and_querymatchspaceptr, aencseq, seedlen, seedpos1, seedpos2, err); if (had_err) { break; } } else { gt_assert(false); } } else { const GtUword query_totallength = gt_encseq_seqlength(bencseq, gt_querymatch_queryseqnum(querymatchptr)); gt_show_seed_extend_encseq(querymatchptr, aencseq, bencseq, query_totallength); } } else { gt_show_seed_extend_plain(&seqpairbuf, linspace_spacemanager, linspace_scorehandler, alignment, alignment_show_buffer, alignmentwidth, arguments->showeoplist, characters, wildcardshow, aencseq, bencseq, querymatchptr); } } polishing_info_delete(pol_info); gt_greedy_extend_matchinfo_delete(greedyextendmatchinfo); gt_free(alignment_show_buffer); gt_scorehandler_delete(linspace_scorehandler); gt_linspaceManagement_delete(linspace_spacemanager); gt_free(seqpairbuf.a_sequence); gt_free(seqpairbuf.b_sequence); gt_alignment_delete(alignment); } gt_seedextend_match_iterator_delete(semi); return had_err; }