/* global alignment with linear gapcosts in linear space * with constant cost values */ GtUword gt_computelinearspace(LinspaceManagement *spacemanager, GtAlignment *align, const GtUchar *useq, GtUword ustart, GtUword ulen, const GtUchar *vseq, GtUword vstart, GtUword vlen, GtUword matchcost, GtUword mismatchcost, GtUword gapcost) { GtUword distance; GtScoreHandler *scorehandler; gt_assert(spacemanager && align); scorehandler = gt_scorehandler_new_DNA(matchcost, mismatchcost, 0, gapcost); distance = gt_computelinearspace_generic(spacemanager, scorehandler, align, useq, ustart, ulen, vseq, vstart, vlen); gt_scorehandler_delete(scorehandler); return distance; }
GtWord gt_computelinearspace_local(LinspaceManagement *spacemanager, GtAlignment *align, const GtUchar *useq, GtUword ustart, GtUword ulen, const GtUchar *vseq, GtUword vstart, GtUword vlen, GtWord matchscore, GtWord mismatchscore, GtWord gapscore) { GtWord score; gt_assert(align && spacemanager); GtScoreHandler *scorehandler = gt_scorehandler_new_DNA(matchscore, mismatchscore, 0, gapscore); score = gt_computelinearspace_local_generic(spacemanager, scorehandler, align, useq, ustart, ulen, vseq, vstart, vlen); gt_scorehandler_delete(scorehandler); return score; }
/* create a global alignment in square space with constant cost values, * to use it in linear context you have to generate an spacemanager before, * in any other case it can be NULL */ GtUword alignment_in_square_space(GtLinspaceManagement *spacemanager, GtAlignment *align, const GtUchar *useq, GtUword ustart, GtUword ulen, const GtUchar *vseq, GtUword vstart, GtUword vlen, GtUword matchcost, GtUword mismatchcost, GtUword gapcost) { GtUword distance; GtScoreHandler *scorehandler; gt_assert(align); scorehandler = gt_scorehandler_new(matchcost, mismatchcost, 0, gapcost); distance = alignment_in_square_space_generic (spacemanager, align, useq, ustart, ulen, vseq, vstart, vlen, scorehandler); gt_scorehandler_delete(scorehandler); return distance; }
/* create an local alignment in square space with constant score values, * to use it in linear context you have to generate an spacemanager before, * in any other case it can be NULL */ GtWord alignment_in_square_space_local(GtLinspaceManagement *spacemanager, GtAlignment *align, const GtUchar *useq, GtUword ustart, GtUword ulen, const GtUchar *vseq, GtUword vstart, GtUword vlen, GtWord matchscore, GtWord mismatchscore, GtWord gapscore) { GtWord score; gt_assert(align); GtScoreHandler *scorehandler = gt_scorehandler_new(matchscore, mismatchscore, 0, gapscore); score = alignment_in_square_space_local_generic(spacemanager, align, useq, ustart, ulen, vseq, vstart, vlen, scorehandler); gt_scorehandler_delete(scorehandler); return score; }
void gt_checklinearspace_local(GT_UNUSED bool forward, const GtUchar *useq, GtUword ulen, const GtUchar *vseq, GtUword vlen) { GtAlignment *align; GtWord score1, score2, score3, score4, matchscore = 2, mismatchscore = -2, gapscore = -1; GtUchar *low_useq, *low_vseq; LinspaceManagement *spacemanager; GtScoreHandler *scorehandler; GtAlphabet *alphabet; if (memchr(useq, LINEAR_EDIST_GAP,ulen) != NULL) { fprintf(stderr,"%s: sequence u contains gap symbol\n",__func__); exit(GT_EXIT_PROGRAMMING_ERROR); } if (memchr(vseq, LINEAR_EDIST_GAP,vlen) != NULL) { fprintf(stderr,"%s: sequence v contains gap symbol\n",__func__); exit(GT_EXIT_PROGRAMMING_ERROR); } scorehandler = gt_scorehandler_new_DNA(matchscore, mismatchscore, 0, gapscore); alphabet = gt_scorehandler_get_alphabet(scorehandler); low_useq = check_dna_sequence(useq, ulen, alphabet); low_vseq = check_dna_sequence(vseq, vlen, alphabet); if (low_useq == NULL || low_vseq == NULL) { low_useq? gt_free(low_useq):0; low_vseq? gt_free(low_vseq):0; gt_scorehandler_delete(scorehandler); return; } spacemanager = gt_linspaceManagement_new(); align = gt_alignment_new(); score1 = gt_computelinearspace_local_generic(spacemanager, scorehandler, align, useq, 0, ulen, vseq, 0, vlen); score2 = gt_alignment_eval_with_score(align, matchscore, mismatchscore, gapscore); gt_linspaceManagement_delete(spacemanager); gt_scorehandler_delete(scorehandler); if (score1 != score2) { fprintf(stderr,"gt_computelinearspace_local = "GT_WD" != "GT_WD " = gt_alignment_eval_generic_with_score\n", score1, score2); exit(GT_EXIT_PROGRAMMING_ERROR); } gt_alignment_reset(align); score3 = alignment_in_square_space_local(NULL, align, useq, 0, ulen, vseq, 0, vlen, matchscore, mismatchscore, gapscore); if (score1 != score3) { fprintf(stderr,"gt_computelinearspace_local = "GT_WD" != "GT_WD " = alignment_in_square_space_local\n", score1, score3); exit(GT_EXIT_PROGRAMMING_ERROR); } score4 = gt_alignment_eval_with_score(align, matchscore, mismatchscore, gapscore); if (score3 != score4) { fprintf(stderr,"alignment_in_square_space_local = "GT_WD" != "GT_WD " = gt_alignment_eval_generic_with_score\n", score3, score4); exit(GT_EXIT_PROGRAMMING_ERROR); } gt_alignment_delete(align); gt_free(low_useq); gt_free(low_vseq); }
void gt_checklinearspace(GT_UNUSED bool forward, const GtUchar *useq, GtUword ulen, const GtUchar *vseq, GtUword vlen) { GtAlignment *align; GtUword edist1, edist2, edist3, edist4, matchcost = 0, mismatchcost = 1, gapcost = 1; LinspaceManagement *spacemanager; GtScoreHandler *scorehandler; GtUchar *low_useq, *low_vseq; if (memchr(useq, LINEAR_EDIST_GAP,ulen) != NULL) { fprintf(stderr,"%s: sequence u contains gap symbol\n",__func__); exit(GT_EXIT_PROGRAMMING_ERROR); } if (memchr(vseq, LINEAR_EDIST_GAP,vlen) != NULL) { fprintf(stderr,"%s: sequence v contains gap symbol\n",__func__); exit(GT_EXIT_PROGRAMMING_ERROR); } scorehandler = gt_scorehandler_new_DNA(matchcost, mismatchcost, 0, gapcost); GtAlphabet *alphabet = gt_scorehandler_get_alphabet(scorehandler); low_useq = check_dna_sequence(useq, ulen, alphabet); low_vseq = check_dna_sequence(vseq, vlen, alphabet); if (low_useq == NULL || low_vseq == NULL) { low_useq? gt_free(low_useq):0; low_vseq? gt_free(low_vseq):0; gt_scorehandler_delete(scorehandler); return; } spacemanager = gt_linspaceManagement_new(); align = gt_alignment_new_with_seqs(low_useq, ulen, low_vseq, vlen); edist1 = gt_calc_linearalign(spacemanager, scorehandler, align, low_useq, 0, ulen, low_vseq, 0, vlen); edist2 = distance_only_global_alignment(useq, 0, ulen, vseq, 0, vlen, scorehandler); if (edist1 != edist2) { fprintf(stderr,"gt_calc_linearalign = "GT_WU" != "GT_WU " = distance_only_global_alignment\n", edist1,edist2); exit(GT_EXIT_PROGRAMMING_ERROR); } edist3 = gt_alignment_eval_with_score(align, matchcost, mismatchcost, gapcost); if (edist2 != edist3) { fprintf(stderr,"distance_only_global_alignment = "GT_WU" != "GT_WU " = gt_alignment_eval_with_score\n", edist2,edist3); exit(GT_EXIT_PROGRAMMING_ERROR); } edist4 = gt_calc_linearedist(low_useq, ulen, low_vseq, vlen); if (edist3 != edist4) { fprintf(stderr,"gt_alignment_eval_with_score = "GT_WU" != "GT_WU " = gt_calc_linearedist\n", edist3, edist4); exit(GT_EXIT_PROGRAMMING_ERROR); } gt_free(low_useq); gt_free(low_vseq); gt_linspaceManagement_delete(spacemanager); gt_scorehandler_delete(scorehandler); gt_alignment_delete(align); }
static int gt_linspace_align_runner(GT_UNUSED int argc, GT_UNUSED const char **argv, GT_UNUSED int parsed_args, void *tool_arguments, GtError *err) { GtLinspaceArguments *arguments = tool_arguments; int had_err = 0; GtAlignment *align; GtWord left_dist = 0, right_dist = 0; GtSequenceTable *sequence_table1, *sequence_table2; GtLinspaceManagement *spacemanager; GtScoreHandler *scorehandler = NULL; GtTimer *linspacetimer = NULL; GtAlphabet *alphabet = NULL; gt_error_check(err); gt_assert(arguments); sequence_table1 = gt_sequence_table_new(); sequence_table2 = gt_sequence_table_new(); align = gt_alignment_new(); spacemanager = gt_linspace_management_new(); gt_linspace_management_set_TSfactor(spacemanager,arguments->timesquarefactor); /* get sequences */ if (gt_str_array_size(arguments->strings) > 0) { get_onesequence(sequence_table1, arguments->strings, 0); sequence_table1->size++; get_onesequence(sequence_table2, arguments->strings, 1); sequence_table2->size++; } else if (gt_str_array_size(arguments->files) > 0) { had_err = get_fastasequences(sequence_table1, gt_str_array_get_str(arguments->files,0),err); if (!had_err) { had_err = get_fastasequences(sequence_table2, gt_str_array_get_str(arguments->files,1),err); } } if (arguments->dna) { alphabet = gt_alphabet_new_dna(); } else { gt_assert(arguments->protein); alphabet = gt_alphabet_new_protein(); } gt_encode_sequence_table(alphabet,sequence_table1); gt_encode_sequence_table(alphabet,sequence_table2); if (!had_err) { scorehandler = gt_arguments2scorehandler(arguments,err); if (scorehandler == NULL) { had_err = -1; } else { if (arguments->global && arguments->protein && !arguments->has_costmatrix) { GtScoreHandler *costhandler = gt_scorehandler2costhandler(scorehandler); gt_scorehandler_delete(scorehandler); scorehandler = costhandler; } } } /* get diagonal band */ if (!had_err && arguments->diagonal) { if (gt_str_array_size(arguments->diagonalbonds) > 0) { had_err = gt_parse_score_value(__LINE__,&left_dist, gt_str_array_get(arguments->diagonalbonds,0), false, err); if (!had_err) { had_err = gt_parse_score_value(__LINE__,&right_dist, gt_str_array_get(arguments->diagonalbonds,1), false, err); } } } if (!had_err && arguments->spacetime) { linspacetimer = gt_timer_new(); } /* alignment functions with linear gap costs */ if (!had_err) { bool affine; if (gt_str_array_size(arguments->linearcosts) > 0) { affine = false; } else { gt_assert(gt_str_array_size(arguments->affinecosts) > 0); affine = true; } had_err = gt_all_against_all_alignment_check ( affine, align, arguments, spacemanager, scorehandler, gt_alphabet_characters(alphabet), gt_alphabet_wildcard_show(alphabet), sequence_table1, sequence_table2, left_dist, right_dist, linspacetimer,err); } /*spacetime option*/ if (!had_err && arguments->spacetime) { printf("# combined space peak in kilobytes: %f\n", GT_KILOBYTES(gt_linspace_management_get_spacepeak(spacemanager))); gt_timer_show_formatted(linspacetimer,"# TIME overall " GT_WD ".%02ld\n", stdout); } gt_timer_delete(linspacetimer); gt_linspace_management_delete(spacemanager); gt_sequence_table_delete(sequence_table1); gt_sequence_table_delete(sequence_table2); gt_alignment_delete(align); gt_alphabet_delete(alphabet); gt_scorehandler_delete(scorehandler); return had_err; }
void gt_linearalign_check_local(GT_UNUSED bool forward, const GtUchar *useq, GtUword ulen, const GtUchar *vseq, GtUword vlen) { GtAlignment *align; GtWord score1, score2, score3, score4, matchscore = 2, mismatchscore = -2, gapscore = -1; GtLinspaceManagement *spacemanager; GtScoreHandler *scorehandler; if (memchr(useq, LINEAR_EDIST_GAP,ulen) != NULL) { fprintf(stderr,"%s: sequence u contains gap symbol\n",__func__); exit(GT_EXIT_PROGRAMMING_ERROR); } if (memchr(vseq, LINEAR_EDIST_GAP,vlen) != NULL) { fprintf(stderr,"%s: sequence v contains gap symbol\n",__func__); exit(GT_EXIT_PROGRAMMING_ERROR); } scorehandler = gt_scorehandler_new(matchscore, mismatchscore, 0, gapscore); gt_scorehandler_plain(scorehandler); spacemanager = gt_linspace_management_new(); align = gt_alignment_new(); score1 = gt_linearalign_compute_local_generic(spacemanager, scorehandler, align, useq, 0, ulen, vseq, 0, vlen); score2 = gt_alignment_eval_with_score(align, true, matchscore, mismatchscore, gapscore); gt_linspace_management_delete(spacemanager); gt_scorehandler_delete(scorehandler); if (score1 != score2) { fprintf(stderr,"gt_linearalign_compute_local_generic = "GT_WD" != "GT_WD " = gt_alignment_eval_generic_with_score\n", score1, score2); exit(GT_EXIT_PROGRAMMING_ERROR); } gt_alignment_reset(align); score3 = gt_squarealign_calculate_local(NULL, align, useq, 0, ulen, vseq, 0, vlen, matchscore, mismatchscore, gapscore); if (score1 != score3) { fprintf(stderr,"gt_linearalign_compute_local_generic = "GT_WD" != "GT_WD " = gt_squarealign_calculate_local\n", score1, score3); exit(GT_EXIT_PROGRAMMING_ERROR); } score4 = gt_alignment_eval_with_score(align, true, matchscore, mismatchscore, gapscore); if (score3 != score4) { fprintf(stderr,"gt_squarealign_calculate_local = "GT_WD" != "GT_WD " = gt_alignment_eval_generic_with_score\n", score3, score4); exit(GT_EXIT_PROGRAMMING_ERROR); } gt_alignment_delete(align); }
void gt_linearalign_check(GT_UNUSED bool forward, const GtUchar *useq, GtUword ulen, const GtUchar *vseq, GtUword vlen) { GtAlignment *align; GtUword edist1, edist2, edist3, edist4, matchcost = 0, mismatchcost = 1, gapcost = 1; GtLinspaceManagement *spacemanager; GtScoreHandler *scorehandler; const bool downcase = true; if (memchr(useq, LINEAR_EDIST_GAP,ulen) != NULL) { fprintf(stderr,"%s: sequence u contains gap symbol\n",__func__); exit(GT_EXIT_PROGRAMMING_ERROR); } if (memchr(vseq, LINEAR_EDIST_GAP,vlen) != NULL) { fprintf(stderr,"%s: sequence v contains gap symbol\n",__func__); exit(GT_EXIT_PROGRAMMING_ERROR); } scorehandler = gt_scorehandler_new(matchcost, mismatchcost, 0, gapcost); gt_scorehandler_plain(scorehandler); gt_scorehandler_downcase(scorehandler); spacemanager = gt_linspace_management_new(); align = gt_alignment_new_with_seqs(useq, ulen, vseq, vlen); edist1 = gt_calc_linearalign(spacemanager, scorehandler, align, useq, 0, ulen, vseq, 0, vlen); edist2 = gt_squarealign_global_distance_only(useq, 0, ulen, vseq, 0, vlen, scorehandler); if (edist1 != edist2) { fprintf(stderr,"gt_calc_linearalign = "GT_WU" != "GT_WU " = gt_squarealign_global_distance_only\n", edist1,edist2); exit(GT_EXIT_PROGRAMMING_ERROR); } edist3 = gt_alignment_eval_with_score(align, true, matchcost, mismatchcost, gapcost); if (edist2 != edist3) { fprintf(stderr,"gt_squarealign_global_distance_only = "GT_WU" != "GT_WU " = gt_alignment_eval_with_score\n", edist2,edist3); exit(GT_EXIT_PROGRAMMING_ERROR); } edist4 = gt_calc_linearedist(downcase,useq, ulen, vseq, vlen); if (edist3 != edist4) { fprintf(stderr,"gt_alignment_eval_with_score = "GT_WU" != "GT_WU " = gt_calc_linearedist\n", edist3, edist4); exit(GT_EXIT_PROGRAMMING_ERROR); } gt_linspace_management_delete(spacemanager); gt_scorehandler_delete(scorehandler); gt_alignment_delete(align); }
/* determining start and end of local alignment and call global function */ GtWord gt_linearalign_compute_local_generic(GtLinspaceManagement *spacemanager, const GtScoreHandler *scorehandler, GtAlignment *align, const GtUchar *useq, GtUword ustart, GtUword ulen, const GtUchar *vseq, GtUword vstart, GtUword vlen) { GtWord *Ltabcolumn, score = GT_WORD_MAX; GtUwordPair *Starttabcolumn; GtUword ulen_part, ustart_part, vlen_part, vstart_part; GtMaxcoordvalue *max; gt_assert(spacemanager && scorehandler && align); gt_linspace_management_set_ulen(spacemanager,ulen); if (ulen == 0UL || vlen == 0UL) { /* empty alignment */ return 0; } else if (vlen == 1UL) { gt_linspace_management_check_local(spacemanager, (ulen+1)*(vlen+1)-1, ulen, sizeof (GtWord), sizeof (GtWord *)); return gt_squarealign_calculate_local_generic(spacemanager, align, useq, ustart, ulen, vseq, vstart, vlen, scorehandler); } else if (gt_linspace_management_checksquare_local(spacemanager, ulen, vlen, sizeof (*Ltabcolumn), sizeof (*Starttabcolumn))) { /* call 2dim */ return gt_squarealign_calculate_local_generic(spacemanager, align, useq, ustart, ulen, vseq, vstart, vlen, scorehandler); } gt_linspace_management_check_local(spacemanager, ulen, vlen, sizeof (*Ltabcolumn), sizeof (*Starttabcolumn)); max = evaluateallLScolumns(spacemanager, scorehandler, useq, ustart, ulen, vseq, vstart, vlen); if (gt_maxcoordvalue_get_length_safe(max)) { GtScoreHandler *costhandler; ustart_part = ustart+(gt_maxcoordvalue_get_start(max)).a; vstart_part = vstart+(gt_maxcoordvalue_get_start(max)).b; ulen_part = gt_maxcoordvalue_get_row_length(max); vlen_part = gt_maxcoordvalue_get_col_length(max); score = gt_maxcoordvalue_get_value(max); gt_alignment_set_seqs(align, useq + ustart_part, ulen_part, vseq + vstart_part, vlen_part); costhandler = gt_scorehandler2costhandler(scorehandler); /* call global function */ gt_calc_linearalign(spacemanager, costhandler, align, useq, ustart_part, ulen_part, vseq, vstart_part, vlen_part); gt_scorehandler_delete(costhandler); } else { /*empty alignment */ return 0; } return score; }
static int gt_show_seedext_runner(GT_UNUSED int argc, GT_UNUSED const char **argv, GT_UNUSED int parsed_args, void *tool_arguments, GtError *err) { int had_err = 0; GtUword alignmentwidth; GtShowSeedextArguments *arguments = tool_arguments; GtSeedextendMatchIterator *semi; gt_error_check(err); gt_assert(arguments != NULL); /* Parse option string in first line of file specified by filename. */ alignmentwidth = arguments->show_alignment ? 70 : 0; semi = gt_seedextend_match_iterator_new(arguments->matchfilename,err); if (semi == NULL) { had_err = -1; } /* Parse seed extensions. */ if (!had_err) { const GtEncseq *aencseq = gt_seedextend_match_iterator_aencseq(semi), *bencseq = gt_seedextend_match_iterator_bencseq(semi); GtAlignment *alignment = gt_alignment_new(); Polishing_info *pol_info = NULL; GtSequencepairbuffer seqpairbuf = {NULL,NULL,0,0}; /* the following are used if seed_extend is set */ GtGreedyextendmatchinfo *greedyextendmatchinfo = NULL; GtProcessinfo_and_querymatchspaceptr processinfo_and_querymatchspaceptr; const GtUchar *characters = gt_encseq_alphabetcharacters(aencseq); const GtUchar wildcardshow = gt_encseq_alphabetwildcardshow(aencseq); GtUchar *alignment_show_buffer = arguments->show_alignment ? gt_alignment_buffer_new(alignmentwidth) : NULL; GtLinspaceManagement *linspace_spacemanager = gt_linspaceManagement_new(); GtScoreHandler *linspace_scorehandler = gt_scorehandler_new(0,1,0,1);; if (!arguments->relax_polish) { double matchscore_bias = GT_DEFAULT_MATCHSCORE_BIAS; if (gt_seedextend_match_iterator_bias_parameters(semi)) { matchscore_bias = gt_greedy_dna_sequence_bias_get(aencseq); } pol_info = polishing_info_new_with_bias( gt_seedextend_match_iterator_errorpercentage(semi), matchscore_bias, gt_seedextend_match_iterator_history_size(semi)); } if (arguments->seed_display) { gt_seedextend_match_iterator_seed_display_set(semi); } if (arguments->show_alignment || arguments->showeoplist) { gt_seedextend_match_iterator_querymatchoutoptions_set(semi, true, arguments->showeoplist, alignmentwidth, !arguments->relax_polish, arguments->seed_display); } if (arguments->seed_extend) { greedyextendmatchinfo = gt_greedy_extend_matchinfo_new(70, GT_MAX_ALI_LEN_DIFF, gt_seedextend_match_iterator_history_size(semi), GT_MIN_PERC_MAT_HISTORY, 0, /* userdefinedleastlength */ GT_EXTEND_CHAR_ACCESS_ANY, 100, pol_info); } if (pol_info != NULL) { gt_alignment_polished_ends(alignment,pol_info,false); } processinfo_and_querymatchspaceptr.processinfo = greedyextendmatchinfo; if (arguments->sortmatches) { (void) gt_seedextend_match_iterator_all_sorted(semi,true); } while (true) { GtQuerymatch *querymatchptr = gt_seedextend_match_iterator_next(semi); if (querymatchptr == NULL) { break; } if (gt_seedextend_match_iterator_has_seedline(semi)) { if (arguments->seed_extend) { if (aencseq == bencseq) { const GtUword seedlen = gt_seedextend_match_iterator_seedlen(semi), seedpos1 = gt_seedextend_match_iterator_seedpos1(semi), seedpos2 = gt_seedextend_match_iterator_seedpos2(semi); processinfo_and_querymatchspaceptr.querymatchspaceptr = querymatchptr; had_err = gt_greedy_extend_selfmatch_with_output( &processinfo_and_querymatchspaceptr, aencseq, seedlen, seedpos1, seedpos2, err); if (had_err) { break; } } else { gt_assert(false); } } else { const GtUword query_totallength = gt_encseq_seqlength(bencseq, gt_querymatch_queryseqnum(querymatchptr)); gt_show_seed_extend_encseq(querymatchptr, aencseq, bencseq, query_totallength); } } else { gt_show_seed_extend_plain(&seqpairbuf, linspace_spacemanager, linspace_scorehandler, alignment, alignment_show_buffer, alignmentwidth, arguments->showeoplist, characters, wildcardshow, aencseq, bencseq, querymatchptr); } } polishing_info_delete(pol_info); gt_greedy_extend_matchinfo_delete(greedyextendmatchinfo); gt_free(alignment_show_buffer); gt_scorehandler_delete(linspace_scorehandler); gt_linspaceManagement_delete(linspace_spacemanager); gt_free(seqpairbuf.a_sequence); gt_free(seqpairbuf.b_sequence); gt_alignment_delete(alignment); } gt_seedextend_match_iterator_delete(semi); return had_err; }