struct ffAli *smallMiddleExons(struct ffAli *aliList, struct ssBundle *bundle, enum ffStringency stringency) /* Look for small exons in the middle. */ { if (bundle->t3List != NULL) return aliList; /* Can't handle intense translated stuff. */ else { struct dnaSeq *qSeq = bundle->qSeq; struct dnaSeq *genoSeq = bundle->genoSeq; struct ffAli *right, *left = NULL, *newLeft, *newRight; left = aliList; for (right = aliList->right; right != NULL; right = right->right) { if (right->hStart - left->hEnd >= 3 && right->nStart - left->nEnd >= 3) { newLeft = ffFind(left->nEnd, right->nStart, left->hEnd, right->hStart, stringency); if (newLeft != NULL) { newLeft = forceMonotonic(newLeft, qSeq, genoSeq, stringency, bundle->isProt, bundle->t3List ); newRight = ffRightmost(newLeft); if (left != NULL) { left->right = newLeft; newLeft->left = left; } else { aliList = newLeft; } if (right != NULL) { right->left = newRight; newRight->right = right; } } } left = right; } } return aliList; }
void showBundle(struct ssBundle *bun, boolean isRc) /* Display a bundle for user. */ { struct ssFfItem *ffi; for (ffi = bun->ffList; ffi != NULL; ffi = ffi->next) { struct ffAli *left, *right; int score; DNA *needle = bun->qSeq->dna; DNA *hay = bun->genoSeq->dna; left = ffi->ff; right = ffRightmost(left); score = ffScore(left, ffTight); printf("%s:%d-%d of %d %s:%d-%d of %d strand %c score %d\n", bun->genoSeq->name, left->hStart - hay, right->hEnd - hay, bun->genoSeq->size, bun->qSeq->name, left->nStart - needle, right->nEnd - needle, bun->qSeq->size, (isRc ? '-' : '+'), score); } }
static boolean smoothOneGap(struct ffAli *left, struct ffAli *right, struct ffAli *ffList) /* If and necessary connect left and right - either directly or * with a small intermediate ffAli inbetween. Do not bother to * merge directly abutting regions, this happens later. Returns * TRUE if any smoothing done. */ { int nGap = right->nStart - left->nEnd; int hGap = right->hStart - left->hEnd; if (nGap > 0 && hGap > 0 && nGap < 10 && hGap < 10) { int sizeDiff = nGap - hGap; if (sizeDiff < 0) sizeDiff = -sizeDiff; if (sizeDiff <= 3) { struct axtScoreScheme *ss = axtScoreSchemeRnaDefault(); char hSym[20], nSym[20]; int symCount; if (bandExt(TRUE, ss, 3, left->nEnd, nGap, left->hEnd, hGap, 1, sizeof(hSym), &symCount, nSym, hSym, NULL, NULL)) { int gapPenalty = -ffCalcCdnaGapPenalty(hGap, nGap) * ss->matrix['a']['a']; int score = axtScoreSym(ss, symCount, nSym, hSym); if (score >= gapPenalty) { struct ffAli *l, *r; l = ffAliFromSym(symCount, nSym, hSym, NULL, left->nEnd, left->hEnd); r = ffRightmost(l); left->right = l; l->left = left; r->right = right; right->left = r; return TRUE; } } } } return FALSE; }
static struct ffAli *trimFlakyEnds(struct dnaSeq *qSeq, struct dnaSeq *tSeq, struct ffAli *ffList) /* Get rid of small initial and terminal exons that seem to just * be chance alignments. Looks for splice sites and non-degenerate * sequence to keep things. */ { int orientation = ffIntronOrientation(ffList); struct ffAli *left, *right; char *iStart, *iEnd; int blockScore, gapPenalty; /* If one or less block then don't bother. */ if (ffAliCount(ffList) < 2) return ffList; /* Trim beginnings. */ left = ffList; right = ffList->right; while (right != NULL) { blockScore = ffScoreMatch(left->nStart, left->hStart, left->nEnd-left->nStart); blockScore -= aPenalty(left->nStart, left->nEnd - left->nStart); iStart = left->hEnd; iEnd = right->hStart; gapPenalty = trimGapPenalty(iEnd-iStart, right->nStart - left->nEnd, iStart, iEnd, orientation); if (gapPenalty >= blockScore) { freeMem(left); ffList = right; right->left = NULL; } else break; left = right; right = right->right; } right = ffRightmost(ffList); if (right == ffList) return ffList; left = right->left; while (left != NULL) { blockScore = ffScoreMatch(right->nStart, right->hStart, right->nEnd-right->nStart); blockScore -= aPenalty(right->nStart, right->nEnd - right->nStart); iStart = left->hEnd; iEnd = right->hStart; gapPenalty = trimGapPenalty(iEnd-iStart, right->nStart - left->nEnd, iStart, iEnd, orientation); if (gapPenalty >= blockScore) { freeMem(right); left->right = NULL; } else break; right = left; left = left->left; } return ffList; }
void oneAli(struct ffAli *left, struct dnaSeq *otherSeq, struct repeatTracker *rt, boolean isRc, enum ffStringency stringency, FILE *out) /* Analyse one alignment and if it looks good enough write it out to file. */ { struct dnaSeq *genoSeq = rt->seq; UBYTE *repBytes = rt->repBytes; struct ffAli *ff, *nextFf; struct ffAli *right = ffRightmost(left); DNA *needle = otherSeq->dna; DNA *hay = genoSeq->dna; int nStart = left->nStart - needle; int nEnd = right->nEnd - needle; int hStart = left->hStart - hay; int hEnd = right->hEnd - hay; int nSize = nEnd - nStart; int hSize = hEnd - hStart; int nInsertBaseCount = 0; int nInsertCount = 0; int hInsertBaseCount = 0; int hInsertCount = 0; int matchCount = 0; int mismatchCount = 0; int repMatch = 0; int countNs = 0; DNA *np, *hp, n, h; int blockSize; int i; int badScore; int milliBad; int passIt; /* Count up matches, mismatches, inserts, etc. */ for (ff = left; ff != NULL; ff = nextFf) { int hStart; nextFf = ff->right; blockSize = ff->nEnd - ff->nStart; np = ff->nStart; hp = ff->hStart; hStart = hp - hay; for (i=0; i<blockSize; ++i) { n = np[i]; h = hp[i]; if (n == 'n' || h == 'n') ++countNs; else { if (n == h) { if (repBytes[i+hStart]) ++repMatch; else ++matchCount; } else ++mismatchCount; } } if (nextFf != NULL) { if (ff->nEnd != nextFf->nStart) { ++nInsertCount; nInsertBaseCount += nextFf->nStart - ff->nEnd; } if (ff->hEnd != nextFf->hStart) { ++hInsertCount; hInsertBaseCount += nextFf->hStart - ff->hEnd; } } } /* See if it looks good enough to output. */ milliBad = calcMilliBad(nEnd - nStart, hEnd - hStart, nInsertCount, hInsertCount, matchCount, repMatch, mismatchCount, stringency == ffCdna); if (veryTight) { passIt = (milliBad < 60 && (matchCount >= 25 || (matchCount >= 15 && matchCount + repMatch >= 50) || (matchCount >= 5 && repMatch >= 100 && milliBad < 50))); } else { passIt = (milliBad < maxBad && (matchCount >= minBases || (matchCount >= minBases/2 && matchCount + repMatch >= 2*minBases) || (repMatch >= 4*minBases && milliBad < (maxBad/2)))); } if (passIt) { if (isRc) { int temp; int oSize = otherSeq->size; temp = nStart; nStart = oSize - nEnd; nEnd = oSize - temp; } fprintf(out, "%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t" "%c\t" "%s\t%d\t%d\t%d\t" "%s\t%d\t%d\t%d\t%d\t", matchCount, mismatchCount, repMatch, countNs, nInsertCount, nInsertBaseCount, hInsertCount, hInsertBaseCount, (isRc ? '-' : '+'), otherSeq->name, otherSeq->size, nStart, nEnd, genoSeq->name, genoSeq->size, hStart, hEnd, ffAliCount(left)); for (ff = left; ff != NULL; ff = ff->right) fprintf(out, "%d,", ff->nEnd - ff->nStart); fprintf(out, "\t"); for (ff = left; ff != NULL; ff = ff->right) fprintf(out, "%d,", ff->nStart - needle); fprintf(out, "\t"); for (ff = left; ff != NULL; ff = ff->right) fprintf(out, "%d,", ff->hStart - hay); fprintf(out, "\n"); if (ferror(out)) { perror(""); errAbort("Write error to .psl"); } } }
static void savePslx(char *chromName, int chromSize, int chromOffset, struct ffAli *ali, struct dnaSeq *tSeq, struct dnaSeq *qSeq, boolean isRc, enum ffStringency stringency, int minMatch, FILE *f, struct hash *t3Hash, boolean reportTargetStrand, boolean targetIsRc, struct hash *maskHash, int minIdentity, boolean qIsProt, boolean tIsProt, boolean saveSeq) /* Analyse one alignment and if it looks good enough write it out to file in * psl format (or pslX format - if saveSeq is TRUE). */ { /* This function was stolen from psLayout and slightly extensively to cope * with protein as well as DNA aligments. */ struct ffAli *ff, *nextFf; struct ffAli *right = ffRightmost(ali); DNA *needle = qSeq->dna; DNA *hay = tSeq->dna; int nStart = ali->nStart - needle; int nEnd = right->nEnd - needle; int hStart, hEnd; int nInsertBaseCount = 0; int nInsertCount = 0; int hInsertBaseCount = 0; int hInsertCount = 0; int matchCount = 0; int mismatchCount = 0; int repMatch = 0; int countNs = 0; DNA *np, *hp, n, h; int blockSize; int i; struct trans3 *t3List = NULL; Bits *maskBits = NULL; if (maskHash != NULL) maskBits = hashMustFindVal(maskHash, tSeq->name); if (t3Hash != NULL) t3List = hashMustFindVal(t3Hash, tSeq->name); hStart = trans3GenoPos(ali->hStart, tSeq, t3List, FALSE) + chromOffset; hEnd = trans3GenoPos(right->hEnd, tSeq, t3List, TRUE) + chromOffset; /* Count up matches, mismatches, inserts, etc. */ for (ff = ali; ff != NULL; ff = nextFf) { nextFf = ff->right; blockSize = ff->nEnd - ff->nStart; np = ff->nStart; hp = ff->hStart; for (i=0; i<blockSize; ++i) { n = np[i]; h = hp[i]; if (n == 'n' || h == 'n') ++countNs; else { if (n == h) { if (maskBits != NULL) { int seqOff = hp + i - hay; if (bitReadOne(maskBits, seqOff)) ++repMatch; else ++matchCount; } else ++matchCount; } else ++mismatchCount; } } if (nextFf != NULL) { int nhStart = trans3GenoPos(nextFf->hStart, tSeq, t3List, FALSE) + chromOffset; int ohEnd = trans3GenoPos(ff->hEnd, tSeq, t3List, TRUE) + chromOffset; int hGap = nhStart - ohEnd; int nGap = nextFf->nStart - ff->nEnd; if (nGap != 0) { ++nInsertCount; nInsertBaseCount += nGap; } if (hGap != 0) { ++hInsertCount; hInsertBaseCount += hGap; } } } /* See if it looks good enough to output, and output. */ /* if (score >= minMatch) Moved to higher level */ { int gaps = nInsertCount + (stringency == ffCdna ? 0: hInsertCount); int id = roundingScale(1000, matchCount + repMatch - 2*gaps, matchCount + repMatch + mismatchCount); if (id >= minIdentity) { if (isRc) { int temp; int oSize = qSeq->size; temp = nStart; nStart = oSize - nEnd; nEnd = oSize - temp; } if (targetIsRc) { int temp; temp = hStart; hStart = chromSize - hEnd; hEnd = chromSize - temp; } fprintf(f, "%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%c", matchCount, mismatchCount, repMatch, countNs, nInsertCount, nInsertBaseCount, hInsertCount, hInsertBaseCount, (isRc ? '-' : '+')); if (reportTargetStrand) fprintf(f, "%c", (targetIsRc ? '-' : '+') ); fprintf(f, "\t%s\t%d\t%d\t%d\t" "%s\t%d\t%d\t%d\t%d\t", qSeq->name, qSeq->size, nStart, nEnd, chromName, chromSize, hStart, hEnd, ffAliCount(ali)); for (ff = ali; ff != NULL; ff = ff->right) fprintf(f, "%ld,", (long)(ff->nEnd - ff->nStart)); fprintf(f, "\t"); for (ff = ali; ff != NULL; ff = ff->right) fprintf(f, "%ld,", (long)(ff->nStart - needle)); fprintf(f, "\t"); for (ff = ali; ff != NULL; ff = ff->right) fprintf(f, "%d,", trans3GenoPos(ff->hStart, tSeq, t3List, FALSE) + chromOffset); if (saveSeq) { fputc('\t', f); for (ff = ali; ff != NULL; ff = ff->right) { mustWrite(f, ff->nStart, ff->nEnd - ff->nStart); fputc(',', f); } fputc('\t', f); for (ff = ali; ff != NULL; ff = ff->right) { mustWrite(f, ff->hStart, ff->hEnd - ff->hStart); fputc(',', f); } } fprintf(f, "\n"); if (ferror(f)) { perror(""); errAbort("Write error to .psl"); } } } }