ALNoverlap * Optimal_Overlap_AS_forCNS(char *a, char *b, int begUNUSED, int endUNUSED, int ahang, int bhang, int opposite, double erate, double thresh, int minlen, CompareOptions what) { static Optimal_Overlap_Data *ood = NULL; if (ood == NULL) ood = new Optimal_Overlap_Data; memset(ood->h_alignA, 0, sizeof(char) * (AS_MAX_READLEN + AS_MAX_READLEN + 2)); memset(ood->h_alignB, 0, sizeof(char) * (AS_MAX_READLEN + AS_MAX_READLEN + 2)); memset(ood->h_trace, 0, sizeof(int) * (AS_MAX_READLEN + AS_MAX_READLEN + 2)); alignLinker_s al; //if (VERBOSE_MULTIALIGN_OUTPUT >= 3) // fprintf(stderr, "Optimal_Overlap_AS_forCNS()-- Begins\n"); #if 0 if (erate > AS_MAX_ERROR_RATE) { //fprintf(stderr, "Optimal_Overlap_AS_forCNS()-- erate=%f >= AS_MAX_ERROR_RATE=%f, reset to max\n", erate, (double)AS_MAX_ERROR_RATE); erate = AS_MAX_ERROR_RATE; } assert((0.0 <= erate) && (erate <= AS_MAX_ERROR_RATE)); #endif if (opposite) reverseComplementSequence(b, strlen(b)); //if (VERBOSE_MULTIALIGN_OUTPUT >= 3) { // fprintf(stderr, "ALIGN %s\n", a); // fprintf(stderr, "ALIGN %s\n", b); //} alignLinker(ood->h_alignA, ood->h_alignB, a, b, &al, true, // Looking for global end-to-end alignments false, // Count matches to N as matches ahang, bhang); if (al.alignLen == 0) { return NULL; } //if (VERBOSE_MULTIALIGN_OUTPUT >= 3) { // fprintf(stderr, "ALIGN %d %d-%d %d-%d opposite=%d\n", al.alignLen, al.begI, al.endI, al.begJ, al.endJ, opposite); // fprintf(stderr, "ALIGN '%s'\n", ood->h_alignA); // fprintf(stderr, "ALIGN '%s'\n", ood->h_alignB); //} if (opposite) { reverseComplementSequence(b, strlen(b)); reverseComplementSequence(ood->h_alignA, al.alignLen); reverseComplementSequence(ood->h_alignB, al.alignLen); int x = al.begJ; al.begJ = al.lenB - al.endJ; al.endJ = al.lenB - x; } // We don't expect partial overlaps here. At least one fragment // must have an alignment to the very start. // // ECR depends on this return value; it is allowed to fail // when building a new unitig multialign. For example: // // <----------------------- // ------> // // When ECR tries to extend the second fragment, it checks that // the extended fragment overlaps the next contig. It does not // check that the extended bits agree with the first fragment, // leaving that up to "does the unitig rebuild". // if ((al.begJ != 0) && (al.begI != 0)) return(NULL); ood->o.begpos = (al.begI > 0) ? (al.begI) : -(al.begJ); ood->o.endpos = (al.lenB - al.endJ > 0) ? (al.lenB - al.endJ) : -(al.lenA - al.endI); ood->o.length = al.alignLen; ood->o.diffs = 0; ood->o.comp = opposite; ood->o.trace = ood->h_trace; { int x=0; int tp = 0; int ap = al.begI; int bp = al.begJ; for (x=0; x<al.alignLen; x++) { if (ood->h_alignA[x] == '-') { ood->h_trace[tp++] = -(ap + 1); ap--; } if (ood->h_alignB[x] == '-') { ood->h_trace[tp++] = bp + 1; bp--; } // Count the differences. // // But allow N's and lowercase as matches. If either letter is N, then the other letter is // NOT N (if both letters were N, both would be lowercase n, representing a match). This // just subtracts out the diff we added in above. // bool diff = false; bool ignore = false; if (toupper(ood->h_alignA[x]) != toupper(ood->h_alignB[x])) diff = true; if ((ood->h_alignA[x] == 'N') || (ood->h_alignA[x] == 'n') || (ood->h_alignB[x] == 'N') || (ood->h_alignB[x] == 'n')) ignore = true; if (islower(ood->h_alignA[x]) && (ood->h_alignB[x] == '-')) ignore = true; if ((diff == true) && (ignore == false)) ood->o.diffs++; bp++; ap++; } ood->h_trace[tp] = 0; //if (VERBOSE_MULTIALIGN_OUTPUT >= 4) { // fprintf(stderr, "trace"); // for (x=0; x<tp; x++) // fprintf(stderr, " %d", ood->h_trace[x]); // fprintf(stderr, "\n"); // fprintf(stderr, "A: %4d-%4d %4d %s\n", al.begI, al.endI, al.lenA, ood->h_alignA); // fprintf(stderr, "B: %4d-%4d %4d %s\n", al.begJ, al.endJ, al.lenB, ood->h_alignB); //} } //if (VERBOSE_MULTIALIGN_OUTPUT >= 3) { // fprintf(stderr, "ERATE: diffs=%d / length=%d = %f\n", ood->o.diffs, ood->o.length, (double)ood->o.diffs / ood->o.length); // fprintf(stderr, "Optimal_Overlap_AS_forCNS()-- Ends\n"); //} if ((double)ood->o.diffs / ood->o.length <= erate) return(&ood->o); return(NULL); }
int foundLinker(char *seq,char *link1,char *link2,int *st,int *en,float *pId, int allowTail) { int score,alnlen,id,start,end,coverage; int score1,alnlen1,id1,start1,end1,coverage1; int score2,alnlen2,id2,start2,end2,coverage2; int found,found1,found2; float percentId,percentId1,percentId2; char *link; found1 = found2 = found = 0; score1 = alignLinker(seq,link1,&alnlen1,&id1,&start1,&end1); score2 = alignLinker(seq,link2,&alnlen2,&id2,&start2,&end2); percentId1 = ((float) id1) / ((float) alnlen1); percentId2 = ((float) id2) / ((float) alnlen2); coverage1 = end1-start1+1; coverage2 = end2-start2+1; if (percentId1 >= identity && (coverage1 >= matchLen || (allowTail && strlen(seq)-end1 <= 2 && coverage1 >= minLengthToCallLinker))) { found1 = 1; } if (percentId2 >= identity && (coverage2 >= matchLen || (allowTail && strlen(seq)-end2 <= 2 && coverage2 >= minLengthToCallLinker))) { found2 = 1; } if (verbose > 1) { if (found1) { fprintf(stderr,"found1: %s score=%d id=%d alnlen=%d\n",seq,score1,id1,alnlen1); fprintf(stderr," %*s%s\n",start1,"",link1); } if (found2) { fprintf(stderr,"found2: %s score=%d id=%d alnlen=%d\n",seq,score2,id2,alnlen2); fprintf(stderr," %*s%s\n",start2,"",link2); } } if (found1 && (!found2 || score1 > score2)) { found = 1; score = score1; alnlen = alnlen1; id = id1; start = start1; end = end1; coverage = coverage1; percentId = percentId1; link = link1; } else if ((!found1 || score2 >= score1) && found2) { found = 1; score = score2; alnlen = alnlen2; id = id2; start = start2; end = end2; coverage = coverage2; percentId = percentId2; link = link2; } else if (!found1 && !found2) { found = 0; } else { fprintf(stderr,"ERROR: ambiguous case in foundLinker\n"); fprintf(stderr," seq: %s\n",seq); fprintf(stderr," lnk1: %s score1=%d id1=%d start1=%d end1=%d alnlen1=%d\n", link1,score1,id1,start1,end1,alnlen1); fprintf(stderr," lnk2: %s score2=%d id2=%d start2=%d end2=%d alnlen2=%d\n", link2,score2,id2,start2,end2,alnlen2); found = 0; } if (found) { *pId = percentId; *st = start; *en = end; if (verbose) { fprintf(stderr,"final: %s score=%d id=%d alnlen=%d\n",seq,score,id,alnlen); fprintf(stderr," %*s%s\n",start,"",link); } } else { if (verbose) fprintf(stderr,"no match %s\n",seq); } return found; }