struct trans3 *seqListToTrans3List(struct dnaSeq *seqList, aaSeq *transLists[3], struct hash **retHash) /* Convert sequence list to a trans3 list and lists for each of three frames. */ { int frame; struct dnaSeq *seq; struct trans3 *t3List = NULL, *t3; struct hash *hash = newHash(0); for (seq = seqList; seq != NULL; seq = seq->next) { t3 = trans3New(seq); hashAddUnique(hash, t3->name, t3); slAddHead(&t3List, t3); for (frame = 0; frame < 3; ++frame) { slAddHead(&transLists[frame], t3->trans[frame]); } } slReverse(&t3List); for (frame = 0; frame < 3; ++frame) { slReverse(&transLists[frame]); } *retHash = hash; return t3List; }
static struct ssBundle *gfTransTransFindBundles(struct genoFind *gfs[3], struct dnaSeq *qSeq, struct hash *t3Hash, boolean isRc, int minMatch, boolean isRna) /* Look for alignment to three translations of qSeq in three translated reading frames. * Save alignment via outFunction/outData. */ { struct trans3 *qTrans = trans3New(qSeq); int qFrame, tFrame; struct gfClump *clumps[3][3], *clump; struct gfRange *rangeList = NULL, *range; int tileSize = gfs[0]->tileSize; bioSeq *targetSeq; struct ssBundle *bun, *bunList = NULL; int hitCount; struct lm *lm = lmInit(0); enum ffStringency stringency = (isRna ? ffCdna : ffLoose); gfTransTransFindClumps(gfs, qTrans->trans, clumps, lm, &hitCount); for (qFrame = 0; qFrame<3; ++qFrame) { for (tFrame=0; tFrame<3; ++tFrame) { for (clump = clumps[qFrame][tFrame]; clump != NULL; clump = clump->next) { struct gfRange *rangeSet = NULL; clumpToHspRange(clump, qTrans->trans[qFrame], tileSize, tFrame, NULL, &rangeSet, TRUE, FALSE); untranslateRangeList(rangeSet, qFrame, tFrame, t3Hash, NULL, 0); rangeList = slCat(rangeSet, rangeList); } } } slSort(&rangeList, gfRangeCmpTarget); rangeList = gfRangesBundle(rangeList, 2000); for (range = rangeList; range != NULL; range = range->next) { targetSeq = range->tSeq; AllocVar(bun); bun->qSeq = qSeq; bun->genoSeq = targetSeq; bun->ffList = gfRangesToFfItem(range->components, qSeq); ssStitch(bun, stringency, minMatch, ssAliCount); slAddHead(&bunList, bun); } for (qFrame = 0; qFrame<3; ++qFrame) for (tFrame=0; tFrame<3; ++tFrame) gfClumpFreeList(&clumps[qFrame][tFrame]); gfRangeFreeList(&rangeList); trans3Free(&qTrans); lmCleanup(&lm); slReverse(&bunList); return bunList; }
static void loadHashT3Ranges(struct gfRange *rangeList, char *tSeqDir, struct hash *tFileCache, int qSeqSize, boolean isRc, struct hash **retT3Hash, struct dnaSeq **retSeqList, struct slRef **retT3RefList) /* Load DNA in ranges into memory, and put translation in a hash * that gets returned. */ { struct hash *t3Hash = newHash(10); struct dnaSeq *targetSeq, *tSeqList = NULL; struct slRef *t3RefList = NULL; struct gfRange *range; for (range = rangeList; range != NULL; range = range->next) { struct trans3 *t3, *oldT3; targetSeq = gfiExpandAndLoadCached(range, tFileCache, tSeqDir, qSeqSize*3, &range->tTotalSize, TRUE, isRc, usualExpansion); slAddHead(&tSeqList, targetSeq); freez(&targetSeq->name); targetSeq->name = cloneString(range->tName); t3 = trans3New(targetSeq); refAdd(&t3RefList, t3); t3->start = range->tStart; t3->end = range->tEnd; t3->nibSize = range->tTotalSize; t3->isRc = isRc; if ((oldT3 = hashFindVal(t3Hash, range->tName)) != NULL) { slAddTail(&oldT3->next, t3); } else { hashAdd(t3Hash, range->tName, t3); } } *retT3Hash = t3Hash; *retSeqList = tSeqList; *retT3RefList = t3RefList; }
void gfAlignTransTrans(int *pConn, char *tSeqDir, struct dnaSeq *qSeq, boolean qIsRc, int minMatch, struct hash *tFileCache, struct gfOutput *out, boolean isRna) /* Search indexed translated genome on server with an dna sequence. Translate * this sequence in three frames. Load homologous bits of genome locally * and do detailed alignment. Call 'outFunction' with each alignment * that is found. */ { struct gfClump *clumps[2][3][3], *clump; char targetName[PATH_LEN]; int qFrame, tFrame, tIsRc; struct gfSeqSource *ssList = NULL, *ss; struct lm *lm = lmInit(0); int tileSize; struct gfRange *rangeList = NULL, *rl, *range; struct trans3 *qTrans = trans3New(qSeq), *t3; struct slRef *t3RefList = NULL, *t3Ref; struct hash *t3Hash = NULL; struct dnaSeq *tSeqList = NULL; enum ffStringency stringency = (isRna ? ffCdna : ffLoose); /* Query server for clumps. */ gfQuerySeqTransTrans(*pConn, qSeq, clumps, lm, &ssList, &tileSize); close(*pConn); *pConn = -1; for (tIsRc=0; tIsRc <= 1; ++tIsRc) { /* Figure out which ranges need to be loaded and load them. */ for (qFrame = 0; qFrame < 3; ++qFrame) { for (tFrame = 0; tFrame < 3; ++tFrame) { rl = seqClumpToRangeList(clumps[tIsRc][qFrame][tFrame], tFrame); rangeList = slCat(rangeList, rl); } } rangeCoorTimes3(rangeList); slSort(&rangeList, gfRangeCmpTarget); rangeList = gfRangesBundle(rangeList, ffIntronMax); loadHashT3Ranges(rangeList, tSeqDir, tFileCache, qSeq->size/3, tIsRc, &t3Hash, &tSeqList, &t3RefList); /* The old range list was not very precise - it was just to get * the DNA loaded. */ gfRangeFreeList(&rangeList); /* Patch up clump list and associated sequence source to refer * to bits of genome loaded into memory. Create new range list * by extending hits in clumps. */ for (qFrame = 0; qFrame < 3; ++qFrame) { for (tFrame = 0; tFrame < 3; ++tFrame) { for (clump = clumps[tIsRc][qFrame][tFrame]; clump != NULL; clump = clump->next) { struct gfSeqSource *ss = clump->target; struct gfRange *rangeSet = NULL; t3 = trans3Find(t3Hash, clumpTargetName(clump), clump->tStart*3, clump->tEnd*3); ss->seq = t3->trans[tFrame]; ss->start = t3->start/3; ss->end = t3->end/3; clumpToHspRange(clump, qTrans->trans[qFrame], tileSize, tFrame, t3, &rangeSet, TRUE, FALSE); untranslateRangeList(rangeSet, qFrame, tFrame, NULL, t3, t3->start); rangeList = slCat(rangeSet, rangeList); } } } slReverse(&rangeList); slSort(&rangeList, gfRangeCmpTarget); rangeList = gfRangesBundle(rangeList, ffIntronMax); for (range = rangeList; range != NULL; range = range->next) { struct dnaSeq *targetSeq = range->tSeq; struct ssBundle *bun; AllocVar(bun); bun->qSeq = qSeq; bun->genoSeq = targetSeq; bun->ffList = gfRangesToFfItem(range->components, qSeq); ssStitch(bun, stringency, minMatch, ssAliCount); getTargetName(range->tName, out->includeTargetFile, targetName); t3 = range->t3; saveAlignments(targetName, t3->nibSize, t3->start, bun, NULL, qIsRc, tIsRc, stringency, minMatch, out); ssBundleFree(&bun); } /* Cleanup for this strand of database. */ gfRangeFreeList(&rangeList); freeHash(&t3Hash); for (t3Ref = t3RefList; t3Ref != NULL; t3Ref = t3Ref->next) { struct trans3 *t3 = t3Ref->val; trans3Free(&t3); } slFreeList(&t3RefList); freeDnaSeqList(&tSeqList); } trans3Free(&qTrans); for (ss = ssList; ss != NULL; ss = ss->next) freeMem(ss->fileName); slFreeList(&ssList); lmCleanup(&lm); }