static struct ffAli *ffNextBreak(struct ffAli *ff, int maxInsert, bioSeq *tSeq, struct trans3 *t3List) /* Return ffAli after first gap in either sequence longer than maxInsert, * or after first gap in both sequences. Return may legitimately * be NULL. */ { struct ffAli *rt = ff->right; int hGap, nGap; int nhStart, ohEnd; for (;;) { if (rt == NULL) break; nhStart = trans3GenoPos(rt->hStart, tSeq, t3List, FALSE); ohEnd = trans3GenoPos(ff->hEnd, tSeq, t3List, TRUE); hGap = nhStart - ohEnd; nGap = rt->nStart - ff->nEnd; if (hGap != 0 && nGap != 0) break; if (hGap < 0 || nGap < 0) break; if (hGap > maxInsert || nGap > maxInsert) break; ff = rt; rt = ff->right; } return rt; }
struct ffAli *cutAtBigIntrons(struct ffAli *ffList, int maxIntron, int *pScore, enum ffStringency stringency, boolean isProt, bioSeq *tSeq, struct trans3 *t3List, struct ffAli **returnLeftovers) /* Return ffList up to the first intron that's too big. * Put the rest of the blocks back onto the leftovers list. */ { struct ffAli *prevFf, *ff, *cutFf = NULL; prevFf = ffList; for (ff = prevFf->right; ff != NULL; ff = ff->right) { int nhStart = trans3GenoPos( ff->hStart, tSeq, t3List, FALSE); int ohEnd = trans3GenoPos(prevFf->hEnd , tSeq, t3List, TRUE); int dt = nhStart - ohEnd; if (dt > maxIntron) { cutFf = prevFf; break; } prevFf = ff; } if (cutFf != NULL) { ff = cutFf->right; cutFf->right = NULL; ff->left = NULL; ffCat(returnLeftovers, &ff); if (isProt) *pScore = ffScoreProtein(ffList, stringency); else *pScore = ffScore(ffList, stringency); } return ffList; }
static int scoreAli(struct ffAli *ali, boolean isProt, enum ffStringency stringency, struct dnaSeq *tSeq, struct trans3 *t3List) /* Score alignment. */ { int (*scoreFunc)(char *a, char *b, int size); struct ffAli *ff, *nextFf; int score = 0; if (isProt) scoreFunc = aaScoreMatch; else scoreFunc = dnaScoreMatch; for (ff = ali; ff != NULL; ff = nextFf) { nextFf = ff->right; score += scoreFunc(ff->nStart, ff->hStart, ff->nEnd-ff->nStart); if (nextFf != NULL) { int nhStart = trans3GenoPos(nextFf->hStart, tSeq, t3List, FALSE); int ohEnd = trans3GenoPos(ff->hEnd, tSeq, t3List, TRUE); int hGap = nhStart - ohEnd; int nGap = nextFf->nStart - ff->nEnd; score -= ffCalcGapPenalty(hGap, nGap, stringency); } } return score; }
void dumpFf(struct ffAli *left, bioSeq *qSeq, bioSeq *tSeq, struct trans3 *t3List) /* Print info on ffAli. */ { struct ffAli *ff; for (ff = left; ff != NULL; ff = ff->right) { int hStart = trans3GenoPos(ff->hStart, tSeq, t3List, FALSE); int hEnd = trans3GenoPos(ff->hEnd , tSeq, t3List, TRUE); printf("(%d - %d)[%ld-%ld] ", hStart, hEnd, (long)(ff->nStart - qSeq->dna), (long)(ff->nEnd - qSeq->dna)); } printf("\n"); }
int trans3Frame(char *pt, struct trans3 *t3List) /* Figure out which frame pt is in or 0 if no frame. */ { if (t3List == NULL) return 0; else return 1 + trans3GenoPos(pt, NULL, t3List, FALSE)%3; }
static void savePslx(char *chromName, int chromSize, int chromOffset, struct ffAli *ali, struct dnaSeq *tSeq, struct dnaSeq *qSeq, boolean isRc, enum ffStringency stringency, int minMatch, FILE *f, struct hash *t3Hash, boolean reportTargetStrand, boolean targetIsRc, struct hash *maskHash, int minIdentity, boolean qIsProt, boolean tIsProt, boolean saveSeq) /* Analyse one alignment and if it looks good enough write it out to file in * psl format (or pslX format - if saveSeq is TRUE). */ { /* This function was stolen from psLayout and slightly extensively to cope * with protein as well as DNA aligments. */ struct ffAli *ff, *nextFf; struct ffAli *right = ffRightmost(ali); DNA *needle = qSeq->dna; DNA *hay = tSeq->dna; int nStart = ali->nStart - needle; int nEnd = right->nEnd - needle; int hStart, hEnd; int nInsertBaseCount = 0; int nInsertCount = 0; int hInsertBaseCount = 0; int hInsertCount = 0; int matchCount = 0; int mismatchCount = 0; int repMatch = 0; int countNs = 0; DNA *np, *hp, n, h; int blockSize; int i; struct trans3 *t3List = NULL; Bits *maskBits = NULL; if (maskHash != NULL) maskBits = hashMustFindVal(maskHash, tSeq->name); if (t3Hash != NULL) t3List = hashMustFindVal(t3Hash, tSeq->name); hStart = trans3GenoPos(ali->hStart, tSeq, t3List, FALSE) + chromOffset; hEnd = trans3GenoPos(right->hEnd, tSeq, t3List, TRUE) + chromOffset; /* Count up matches, mismatches, inserts, etc. */ for (ff = ali; ff != NULL; ff = nextFf) { nextFf = ff->right; blockSize = ff->nEnd - ff->nStart; np = ff->nStart; hp = ff->hStart; for (i=0; i<blockSize; ++i) { n = np[i]; h = hp[i]; if (n == 'n' || h == 'n') ++countNs; else { if (n == h) { if (maskBits != NULL) { int seqOff = hp + i - hay; if (bitReadOne(maskBits, seqOff)) ++repMatch; else ++matchCount; } else ++matchCount; } else ++mismatchCount; } } if (nextFf != NULL) { int nhStart = trans3GenoPos(nextFf->hStart, tSeq, t3List, FALSE) + chromOffset; int ohEnd = trans3GenoPos(ff->hEnd, tSeq, t3List, TRUE) + chromOffset; int hGap = nhStart - ohEnd; int nGap = nextFf->nStart - ff->nEnd; if (nGap != 0) { ++nInsertCount; nInsertBaseCount += nGap; } if (hGap != 0) { ++hInsertCount; hInsertBaseCount += hGap; } } } /* See if it looks good enough to output, and output. */ /* if (score >= minMatch) Moved to higher level */ { int gaps = nInsertCount + (stringency == ffCdna ? 0: hInsertCount); int id = roundingScale(1000, matchCount + repMatch - 2*gaps, matchCount + repMatch + mismatchCount); if (id >= minIdentity) { if (isRc) { int temp; int oSize = qSeq->size; temp = nStart; nStart = oSize - nEnd; nEnd = oSize - temp; } if (targetIsRc) { int temp; temp = hStart; hStart = chromSize - hEnd; hEnd = chromSize - temp; } fprintf(f, "%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%c", matchCount, mismatchCount, repMatch, countNs, nInsertCount, nInsertBaseCount, hInsertCount, hInsertBaseCount, (isRc ? '-' : '+')); if (reportTargetStrand) fprintf(f, "%c", (targetIsRc ? '-' : '+') ); fprintf(f, "\t%s\t%d\t%d\t%d\t" "%s\t%d\t%d\t%d\t%d\t", qSeq->name, qSeq->size, nStart, nEnd, chromName, chromSize, hStart, hEnd, ffAliCount(ali)); for (ff = ali; ff != NULL; ff = ff->right) fprintf(f, "%ld,", (long)(ff->nEnd - ff->nStart)); fprintf(f, "\t"); for (ff = ali; ff != NULL; ff = ff->right) fprintf(f, "%ld,", (long)(ff->nStart - needle)); fprintf(f, "\t"); for (ff = ali; ff != NULL; ff = ff->right) fprintf(f, "%d,", trans3GenoPos(ff->hStart, tSeq, t3List, FALSE) + chromOffset); if (saveSeq) { fputc('\t', f); for (ff = ali; ff != NULL; ff = ff->right) { mustWrite(f, ff->nStart, ff->nEnd - ff->nStart); fputc(',', f); } fputc('\t', f); for (ff = ali; ff != NULL; ff = ff->right) { mustWrite(f, ff->hStart, ff->hEnd - ff->hStart); fputc(',', f); } } fprintf(f, "\n"); if (ferror(f)) { perror(""); errAbort("Write error to .psl"); } } } }
static void saveAxtBundle(char *chromName, int chromSize, int chromOffset, struct ffAli *ali, struct dnaSeq *tSeq, struct hash *t3Hash, struct dnaSeq *qSeq, boolean qIsRc, boolean tIsRc, enum ffStringency stringency, int minMatch, struct gfOutput *out) /* Save alignment to axtBundle. */ { struct axtData *ad = out->data; struct ffAli *sAli, *eAli, *ff, *rt, *eFf = NULL; struct axt *axt; struct dyString *q = newDyString(1024), *t = newDyString(1024); struct axtBundle *gab; struct trans3 *t3List = NULL; if (t3Hash != NULL) t3List = hashMustFindVal(t3Hash, tSeq->name); AllocVar(gab); gab->tSize = chromSize; gab->qSize = qSeq->size; for (sAli = ali; sAli != NULL; sAli = eAli) { eAli = ffNextBreak(sAli, 8, tSeq, t3List); dyStringClear(q); dyStringClear(t); for (ff = sAli; ff != eAli; ff = ff->right) { dyStringAppendN(q, ff->nStart, ff->nEnd - ff->nStart); dyStringAppendN(t, ff->hStart, ff->hEnd - ff->hStart); rt = ff->right; if (rt != eAli) { int nGap = rt->nStart - ff->nEnd; int nhStart = trans3GenoPos(rt->hStart, tSeq, t3List, FALSE) + chromOffset; int ohEnd = trans3GenoPos(ff->hEnd, tSeq, t3List, TRUE) + chromOffset; int hGap = nhStart - ohEnd; int gap = max(nGap, hGap); if (nGap < 0 || hGap < 0) { errAbort("Negative gap size in %s vs %s", tSeq->name, qSeq->name); } if (nGap == gap) { dyStringAppendN(q, ff->nEnd, gap); dyStringAppendMultiC(t, '-', gap); } else { dyStringAppendN(t, ff->hEnd, gap); dyStringAppendMultiC(q, '-', gap); } } eFf = ff; /* Keep track of last block in bunch */ } assert(t->stringSize == q->stringSize); AllocVar(axt); axt->qName = cloneString(qSeq->name); axt->qStart = sAli->nStart - qSeq->dna; axt->qEnd = eFf->nEnd - qSeq->dna; axt->qStrand = (qIsRc ? '-' : '+'); axt->tName = cloneString(chromName); axt->tStart = trans3GenoPos(sAli->hStart, tSeq, t3List, FALSE) + chromOffset; axt->tEnd = trans3GenoPos(eFf->hEnd, tSeq, t3List, TRUE) + chromOffset; axt->tStrand = (tIsRc ? '-' : '+'); axt->symCount = t->stringSize; axt->qSym = cloneString(q->string); axt->tSym = cloneString(t->string); axt->frame = trans3Frame(sAli->hStart, t3List); if (out->qIsProt) axt->score = axtScoreProteinDefault(axt); else axt->score = axtScoreDnaDefault(axt); slAddHead(&gab->axtList, axt); } slReverse(&gab->axtList); dyStringFree(&q); dyStringFree(&t); slAddHead(&ad->bundleList, gab); }