void gt_reinitLocaliTracebackstate(Limdfsconstinfo *lci, GtUword dbprefixlen, GtUword pprefixlen) { LocaliTracebackstate *tbs = &lci->tbs; tbs->dbprefixlen = tbs->dbcurrent = dbprefixlen; tbs->queryend = tbs->querypos = pprefixlen; if (dbprefixlen > (GtUword) tbs->allocatedGtUchardbsubstring) { tbs->spaceGtUchardbsubstring = gt_realloc(tbs->spaceGtUchardbsubstring, sizeof (GtUchar) * dbprefixlen); } gt_alignment_reset(tbs->alignment); }
void gt_checklinearspace_local(GT_UNUSED bool forward, const GtUchar *useq, GtUword ulen, const GtUchar *vseq, GtUword vlen) { GtAlignment *align; GtWord score1, score2, score3, score4, matchscore = 2, mismatchscore = -2, gapscore = -1; GtUchar *low_useq, *low_vseq; LinspaceManagement *spacemanager; GtScoreHandler *scorehandler; GtAlphabet *alphabet; if (memchr(useq, LINEAR_EDIST_GAP,ulen) != NULL) { fprintf(stderr,"%s: sequence u contains gap symbol\n",__func__); exit(GT_EXIT_PROGRAMMING_ERROR); } if (memchr(vseq, LINEAR_EDIST_GAP,vlen) != NULL) { fprintf(stderr,"%s: sequence v contains gap symbol\n",__func__); exit(GT_EXIT_PROGRAMMING_ERROR); } scorehandler = gt_scorehandler_new_DNA(matchscore, mismatchscore, 0, gapscore); alphabet = gt_scorehandler_get_alphabet(scorehandler); low_useq = check_dna_sequence(useq, ulen, alphabet); low_vseq = check_dna_sequence(vseq, vlen, alphabet); if (low_useq == NULL || low_vseq == NULL) { low_useq? gt_free(low_useq):0; low_vseq? gt_free(low_vseq):0; gt_scorehandler_delete(scorehandler); return; } spacemanager = gt_linspaceManagement_new(); align = gt_alignment_new(); score1 = gt_computelinearspace_local_generic(spacemanager, scorehandler, align, useq, 0, ulen, vseq, 0, vlen); score2 = gt_alignment_eval_with_score(align, matchscore, mismatchscore, gapscore); gt_linspaceManagement_delete(spacemanager); gt_scorehandler_delete(scorehandler); if (score1 != score2) { fprintf(stderr,"gt_computelinearspace_local = "GT_WD" != "GT_WD " = gt_alignment_eval_generic_with_score\n", score1, score2); exit(GT_EXIT_PROGRAMMING_ERROR); } gt_alignment_reset(align); score3 = alignment_in_square_space_local(NULL, align, useq, 0, ulen, vseq, 0, vlen, matchscore, mismatchscore, gapscore); if (score1 != score3) { fprintf(stderr,"gt_computelinearspace_local = "GT_WD" != "GT_WD " = alignment_in_square_space_local\n", score1, score3); exit(GT_EXIT_PROGRAMMING_ERROR); } score4 = gt_alignment_eval_with_score(align, matchscore, mismatchscore, gapscore); if (score3 != score4) { fprintf(stderr,"alignment_in_square_space_local = "GT_WD" != "GT_WD " = gt_alignment_eval_generic_with_score\n", score3, score4); exit(GT_EXIT_PROGRAMMING_ERROR); } gt_alignment_delete(align); gt_free(low_useq); gt_free(low_vseq); }
/*call function with linear gap costs for all given sequences */ static int gt_all_against_all_alignment_check(bool affine, GtAlignment *align, const GtLinspaceArguments *arguments, GtLinspaceManagement *spacemanager, const GtScoreHandler *scorehandler, const GtUchar *characters, GtUchar wildcardshow, const GtSequenceTable *sequence_table1, const GtSequenceTable *sequence_table2, GtWord left_dist, GtWord right_dist, GtTimer *linspacetimer, GtError *err) { int had_err = 0; const GtUchar *useq, *vseq; GtUword i, j, ulen, vlen; gt_error_check(err); if (linspacetimer != NULL) { gt_timer_start(linspacetimer); } for (i = 0; !had_err && i < sequence_table1->size; i++) { ulen = gt_str_length(sequence_table1->seqarray[i]); useq = (const GtUchar*) gt_str_get(sequence_table1->seqarray[i]); for (j = 0; j< sequence_table2->size; j++) { vlen = gt_str_length(sequence_table2->seqarray[j]); vseq = (const GtUchar*) gt_str_get(sequence_table2->seqarray[j]); gt_alignment_reset(align); if (arguments->global) { if (arguments->diagonal) { if (gt_str_array_size(arguments->diagonalbonds) == 0) { left_dist = LEFT_DIAGONAL_SHIFT(arguments->similarity, ulen, vlen); right_dist = RIGHT_DIAGONAL_SHIFT(arguments->similarity, ulen, vlen); } if ((left_dist > MIN(0, (GtWord)vlen-(GtWord)ulen))|| (right_dist < MAX(0, (GtWord)vlen-(GtWord)ulen))) { gt_error_set(err, "ERROR: invalid diagonalband for global " "alignment (ulen: "GT_WU", vlen: "GT_WU")\n" "left_dist <= MIN(0, vlen-ulen) and " "right_dist >= MAX(0, vlen-ulen)", ulen, vlen); had_err = 1; } if (!had_err) { (affine ? gt_diagonalbandalign_affinegapcost_compute_generic : gt_diagonalbandalign_compute_generic) (spacemanager, scorehandler, align, useq, 0, ulen, vseq, 0, vlen, left_dist, right_dist); } } else { (affine ? gt_linearalign_affinegapcost_compute_generic : gt_linearalign_compute_generic) (spacemanager, scorehandler, align, useq, 0, ulen, vseq, 0, vlen); } } else if (arguments->local) { (affine ? gt_linearalign_affinegapcost_compute_local_generic : gt_linearalign_compute_local_generic) (spacemanager, scorehandler, align, useq, 0, ulen, vseq, 0, vlen); } /* show alignment*/ if (!had_err) { gt_assert(align != NULL); if (!strcmp(gt_str_get(arguments->outputfile),"stdout")) { alignment_show_with_sequences(useq, ulen, vseq, vlen, align, characters, wildcardshow, arguments->showscore, !arguments->scoreonly, arguments->showsequences, arguments->global, scorehandler, stdout); } else { FILE *fp = gt_fa_fopen_func(gt_str_get(arguments->outputfile), "a", __FILE__,__LINE__,err); if (fp == NULL) { had_err = -1; } else { alignment_show_with_sequences(useq, ulen, vseq, vlen, align, characters, wildcardshow, arguments->showscore, !arguments->scoreonly, arguments->showsequences, arguments->global, scorehandler,fp); gt_fa_fclose(fp); } } } } } if (linspacetimer != NULL) { gt_timer_stop(linspacetimer); } if (!had_err && arguments->wildcardshow) { printf("# wildcards are represented by %c\n", wildcardshow); } return had_err; }
void gt_linearalign_check_local(GT_UNUSED bool forward, const GtUchar *useq, GtUword ulen, const GtUchar *vseq, GtUword vlen) { GtAlignment *align; GtWord score1, score2, score3, score4, matchscore = 2, mismatchscore = -2, gapscore = -1; GtLinspaceManagement *spacemanager; GtScoreHandler *scorehandler; if (memchr(useq, LINEAR_EDIST_GAP,ulen) != NULL) { fprintf(stderr,"%s: sequence u contains gap symbol\n",__func__); exit(GT_EXIT_PROGRAMMING_ERROR); } if (memchr(vseq, LINEAR_EDIST_GAP,vlen) != NULL) { fprintf(stderr,"%s: sequence v contains gap symbol\n",__func__); exit(GT_EXIT_PROGRAMMING_ERROR); } scorehandler = gt_scorehandler_new(matchscore, mismatchscore, 0, gapscore); gt_scorehandler_plain(scorehandler); spacemanager = gt_linspace_management_new(); align = gt_alignment_new(); score1 = gt_linearalign_compute_local_generic(spacemanager, scorehandler, align, useq, 0, ulen, vseq, 0, vlen); score2 = gt_alignment_eval_with_score(align, true, matchscore, mismatchscore, gapscore); gt_linspace_management_delete(spacemanager); gt_scorehandler_delete(scorehandler); if (score1 != score2) { fprintf(stderr,"gt_linearalign_compute_local_generic = "GT_WD" != "GT_WD " = gt_alignment_eval_generic_with_score\n", score1, score2); exit(GT_EXIT_PROGRAMMING_ERROR); } gt_alignment_reset(align); score3 = gt_squarealign_calculate_local(NULL, align, useq, 0, ulen, vseq, 0, vlen, matchscore, mismatchscore, gapscore); if (score1 != score3) { fprintf(stderr,"gt_linearalign_compute_local_generic = "GT_WD" != "GT_WD " = gt_squarealign_calculate_local\n", score1, score3); exit(GT_EXIT_PROGRAMMING_ERROR); } score4 = gt_alignment_eval_with_score(align, true, matchscore, mismatchscore, gapscore); if (score3 != score4) { fprintf(stderr,"gt_squarealign_calculate_local = "GT_WD" != "GT_WD " = gt_alignment_eval_generic_with_score\n", score3, score4); exit(GT_EXIT_PROGRAMMING_ERROR); } gt_alignment_delete(align); }
static void applysmithwaterman(SWdpresource *dpresource, const GtEncseq *encseq, GtUword encsequnit, GtUword startpos, GtUword endpos, const GtUchar *query, GtUword querylen) { Scoretype score; Maxscorecoord maxpair; DPregion maxentry; if (dpresource->allocatedswcol < querylen + 1) { dpresource->allocatedswcol = querylen + 1; dpresource->swcol = gt_realloc(dpresource->swcol, sizeof *dpresource->swcol * dpresource->allocatedswcol); dpresource->swentrycol = gt_realloc(dpresource->swentrycol, sizeof *dpresource->swentrycol * dpresource->allocatedswcol); } score = swlocalsimilarityscore(dpresource->swcol,&maxpair, &dpresource->scorevalues, query,querylen,encseq,startpos,endpos); if (score >= (Scoretype) dpresource->scorethreshold) { GtIdxMatch match; swlocalsimilarityregion(dpresource->swentrycol, &maxentry, &dpresource->scorevalues, query,maxpair.umax, encseq,startpos,startpos + maxpair.vmax); gt_assert(maxentry.similarity == score); match.dbabsolute = false; match.dbstartpos = maxentry.start2; match.dblen = maxentry.len2; match.dbseqnum = encsequnit; match.querystartpos = maxentry.start1; match.querylen = maxentry.len1; gt_assert(maxentry.similarity >= 0); match.distance = (GtUword) maxentry.similarity; if (dpresource->showalignment) { if (dpresource->allocatedmaxedges < (maxentry.len1 + 1) * (maxentry.len2 + 1)) { dpresource->allocatedmaxedges = (maxentry.len1 + 1) * (maxentry.len2 + 1); dpresource->maxedges = gt_realloc(dpresource->maxedges, sizeof *dpresource->maxedges * dpresource->allocatedmaxedges); } gt_alignment_reset(dpresource->alignment); if (dpresource->allocateddbsubstring < (GtUword) maxentry.len2) { dpresource->allocateddbsubstring = (GtUword) maxentry.len2; dpresource->dbsubstring = gt_realloc(dpresource->dbsubstring, sizeof *dpresource->dbsubstring * dpresource->allocateddbsubstring); } swproducealignment(dpresource->alignment, dpresource->dbsubstring, dpresource->maxedges, dpresource->swcol, &dpresource->scorevalues, dpresource->scorethreshold, query + maxentry.start1, maxentry.len1, encseq, startpos + maxentry.start2, startpos + maxentry.start2 + maxentry.len2); match.alignment = dpresource->alignment; match.dbsubstring = dpresource->dbsubstring; } else { match.dbsubstring = NULL; match.alignment = NULL; } dpresource->processmatch(dpresource->processmatchinfo,&match); } }
static void gt_show_seed_extend_plain(GtSequencepairbuffer *seqpairbuf, GtLinspaceManagement *linspace_spacemanager, GtScoreHandler *linspace_scorehandler, GtAlignment *alignment, GtUchar *alignment_show_buffer, GtUword alignmentwidth, bool showeoplist, const GtUchar *characters, GtUchar wildcardshow, const GtEncseq *aencseq, const GtEncseq *bencseq, const GtQuerymatch *querymatchptr) { GtUword edist; GtReadmode query_readmode = gt_querymatch_query_readmode(querymatchptr); const GtUword distance = gt_querymatch_distance(querymatchptr), dblen = gt_querymatch_dblen(querymatchptr), queryseqnum = gt_querymatch_queryseqnum(querymatchptr), querystart_fwdstrand = gt_querymatch_querystart_fwdstrand(querymatchptr), querylen = gt_querymatch_querylen(querymatchptr); const GtUword apos_ab = gt_querymatch_dbstart(querymatchptr); const GtUword bpos_ab = gt_encseq_seqstartpos(bencseq, queryseqnum) + querystart_fwdstrand; gt_querymatch_coordinates_out(querymatchptr); if (dblen >= seqpairbuf->a_allocated) { seqpairbuf->a_sequence = gt_realloc(seqpairbuf->a_sequence, sizeof *seqpairbuf->a_sequence * dblen); seqpairbuf->a_allocated = dblen; } if (querylen >= seqpairbuf->b_allocated) { seqpairbuf->b_sequence = gt_realloc(seqpairbuf->b_sequence, sizeof *seqpairbuf->b_sequence * querylen); seqpairbuf->b_allocated = querylen; } gt_encseq_extract_encoded(aencseq, seqpairbuf->a_sequence, apos_ab, apos_ab + dblen - 1); gt_encseq_extract_encoded(bencseq, seqpairbuf->b_sequence, bpos_ab, bpos_ab + querylen - 1); if (query_readmode != GT_READMODE_FORWARD) { gt_assert(query_readmode == GT_READMODE_REVCOMPL); gt_inplace_reverse_complement(seqpairbuf->b_sequence,querylen); } edist = gt_computelinearspace_generic(linspace_spacemanager, linspace_scorehandler, alignment, seqpairbuf->a_sequence, 0, dblen, seqpairbuf->b_sequence, 0, querylen); if (edist < distance) { printf("# edist=" GT_WU " (smaller by " GT_WU ")\n",edist,distance - edist); } gt_assert(edist <= distance); if (alignmentwidth > 0) { gt_alignment_show_generic(alignment_show_buffer, false, alignment, stdout, alignmentwidth, characters, wildcardshow); } if (showeoplist && distance > 0) { gt_alignment_show_multieop_list(alignment, stdout); } if (alignmentwidth > 0 || showeoplist) { gt_alignment_reset(alignment); } }