void gfAlignStrand(int *pConn, char *tSeqDir, struct dnaSeq *seq, boolean isRc, int minMatch, struct hash *tFileCache, struct gfOutput *out) /* Search genome on server with one strand of other sequence to find homology. * Then load homologous bits of genome locally and do detailed alignment. * Call 'outFunction' with each alignment that is found. */ { struct ssBundle *bun; struct gfRange *rangeList = NULL, *range; struct dnaSeq *targetSeq; char targetName[PATH_LEN]; rangeList = gfQuerySeq(*pConn, seq); close(*pConn); *pConn = -1; slSort(&rangeList, gfRangeCmpTarget); rangeList = gfRangesBundle(rangeList, ffIntronMax); for (range = rangeList; range != NULL; range = range->next) { getTargetName(range->tName, out->includeTargetFile, targetName); targetSeq = gfiExpandAndLoadCached(range, tFileCache, tSeqDir, seq->size, &range->tTotalSize, FALSE, FALSE, usualExpansion); AllocVar(bun); bun->qSeq = seq; bun->genoSeq = targetSeq; alignComponents(range, bun, ffCdna); ssStitch(bun, ffCdna, minMatch, ssAliCount); saveAlignments(targetName, range->tTotalSize, range->tStart, bun, NULL, isRc, FALSE, ffCdna, minMatch, out); ssBundleFree(&bun); freeDnaSeq(&targetSeq); } gfRangeFreeList(&rangeList); }
static struct ffAli *foldInExtras(struct dnaSeq *qSeq, struct dnaSeq *tSeq, struct ffAli *ffList, struct ffAli *extraList) /* Integrate extraList into ffList and return result. * Frees bits of extraList that aren't used. */ { if (extraList != NULL) { struct ssBundle *bun; struct ssFfItem *ffi; AllocVar(bun); bun->qSeq = qSeq; bun->genoSeq = tSeq; bun->avoidFuzzyFindKludge = TRUE; AllocVar(ffi); ffi->ff = ffList; slAddHead(&bun->ffList, ffi); AllocVar(ffi); ffi->ff = extraList; slAddHead(&bun->ffList, ffi); ssStitch(bun, ffCdna, 16, 1); if (bun->ffList != NULL) { ffList = bun->ffList->ff; bun->ffList->ff = NULL; } else { ffList = NULL; } ssBundleFree(&bun); } return ffList; }
void ssBundleFreeList(struct ssBundle **pList) /* Free up list of ssBundles */ { struct ssBundle *el, *next; for (el = *pList; el != NULL; el = next) { next = el->next; ssBundleFree(&el); } *pList = NULL; }
void gfFindAlignAaTrans(struct genoFind *gfs[3], aaSeq *qSeq, struct hash *t3Hash, boolean tIsRc, int minMatch, struct gfOutput *out) /* Look for qSeq alignment in three translated reading frames. Save alignment * via outFunction/outData. */ { struct gfClump *clumps[3]; int frame; struct gfClump *clump; struct gfRange *rangeList = NULL, *range; aaSeq *targetSeq; struct ssBundle *bun; int tileSize = gfs[0]->tileSize; struct trans3 *t3; int hitCount; struct lm *lm = lmInit(0); gfTransFindClumps(gfs, qSeq, clumps, lm, &hitCount); for (frame=0; frame<3; ++frame) { for (clump = clumps[frame]; clump != NULL; clump = clump->next) { clumpToHspRange(clump, qSeq, tileSize, frame, NULL, &rangeList, TRUE, FALSE); } } slReverse(&rangeList); slSort(&rangeList, gfRangeCmpTarget); rangeList = gfRangesBundle(rangeList, ffIntronMax/3); for (range = rangeList; range != NULL; range = range->next) { targetSeq = range->tSeq; t3 = hashMustFindVal(t3Hash, targetSeq->name); AllocVar(bun); bun->qSeq = qSeq; bun->genoSeq = targetSeq; bun->ffList = gfRangesToFfItem(range->components, qSeq); bun->isProt = TRUE; bun->t3List = t3; ssStitch(bun, ffCdna, minMatch, ssAliCount); saveAlignments(targetSeq->name, t3->seq->size, 0, bun, t3Hash, FALSE, tIsRc, ffCdna, minMatch, out); ssBundleFree(&bun); } gfRangeFreeList(&rangeList); for (frame=0; frame<3; ++frame) gfClumpFreeList(&clumps[frame]); lmCleanup(&lm); }
static void gfAlignSomeClumps(struct genoFind *gf, struct gfClump *clumpList, bioSeq *seq, boolean isRc, int minMatch, struct gfOutput *out, boolean isProt, enum ffStringency stringency) /* Convert gfClumps to an actual alignment that gets saved via * outFunction/outData. */ { struct gfClump *clump; struct gfRange *rangeList = NULL, *range; bioSeq *targetSeq; struct ssBundle *bun; int intronMax = ffIntronMax; if (isProt) intronMax /= 3; for (clump = clumpList; clump != NULL; clump = clump->next) { clumpToHspRange(clump, seq, gf->tileSize, 0, NULL, &rangeList, isProt, FALSE); } slReverse(&rangeList); slSort(&rangeList, gfRangeCmpTarget); rangeList = gfRangesBundle(rangeList, intronMax); for (range = rangeList; range != NULL; range = range->next) { targetSeq = range->tSeq; AllocVar(bun); bun->qSeq = seq; bun->genoSeq = targetSeq; bun->ffList = gfRangesToFfItem(range->components, seq); bun->isProt = isProt; ssStitch(bun, stringency, minMatch, ssAliCount); saveAlignments(targetSeq->name, targetSeq->size, 0, bun, NULL, isRc, FALSE, stringency, minMatch, out); ssBundleFree(&bun); } gfRangeFreeList(&rangeList); }
void gfAlignTransTrans(int *pConn, char *tSeqDir, struct dnaSeq *qSeq, boolean qIsRc, int minMatch, struct hash *tFileCache, struct gfOutput *out, boolean isRna) /* Search indexed translated genome on server with an dna sequence. Translate * this sequence in three frames. Load homologous bits of genome locally * and do detailed alignment. Call 'outFunction' with each alignment * that is found. */ { struct gfClump *clumps[2][3][3], *clump; char targetName[PATH_LEN]; int qFrame, tFrame, tIsRc; struct gfSeqSource *ssList = NULL, *ss; struct lm *lm = lmInit(0); int tileSize; struct gfRange *rangeList = NULL, *rl, *range; struct trans3 *qTrans = trans3New(qSeq), *t3; struct slRef *t3RefList = NULL, *t3Ref; struct hash *t3Hash = NULL; struct dnaSeq *tSeqList = NULL; enum ffStringency stringency = (isRna ? ffCdna : ffLoose); /* Query server for clumps. */ gfQuerySeqTransTrans(*pConn, qSeq, clumps, lm, &ssList, &tileSize); close(*pConn); *pConn = -1; for (tIsRc=0; tIsRc <= 1; ++tIsRc) { /* Figure out which ranges need to be loaded and load them. */ for (qFrame = 0; qFrame < 3; ++qFrame) { for (tFrame = 0; tFrame < 3; ++tFrame) { rl = seqClumpToRangeList(clumps[tIsRc][qFrame][tFrame], tFrame); rangeList = slCat(rangeList, rl); } } rangeCoorTimes3(rangeList); slSort(&rangeList, gfRangeCmpTarget); rangeList = gfRangesBundle(rangeList, ffIntronMax); loadHashT3Ranges(rangeList, tSeqDir, tFileCache, qSeq->size/3, tIsRc, &t3Hash, &tSeqList, &t3RefList); /* The old range list was not very precise - it was just to get * the DNA loaded. */ gfRangeFreeList(&rangeList); /* Patch up clump list and associated sequence source to refer * to bits of genome loaded into memory. Create new range list * by extending hits in clumps. */ for (qFrame = 0; qFrame < 3; ++qFrame) { for (tFrame = 0; tFrame < 3; ++tFrame) { for (clump = clumps[tIsRc][qFrame][tFrame]; clump != NULL; clump = clump->next) { struct gfSeqSource *ss = clump->target; struct gfRange *rangeSet = NULL; t3 = trans3Find(t3Hash, clumpTargetName(clump), clump->tStart*3, clump->tEnd*3); ss->seq = t3->trans[tFrame]; ss->start = t3->start/3; ss->end = t3->end/3; clumpToHspRange(clump, qTrans->trans[qFrame], tileSize, tFrame, t3, &rangeSet, TRUE, FALSE); untranslateRangeList(rangeSet, qFrame, tFrame, NULL, t3, t3->start); rangeList = slCat(rangeSet, rangeList); } } } slReverse(&rangeList); slSort(&rangeList, gfRangeCmpTarget); rangeList = gfRangesBundle(rangeList, ffIntronMax); for (range = rangeList; range != NULL; range = range->next) { struct dnaSeq *targetSeq = range->tSeq; struct ssBundle *bun; AllocVar(bun); bun->qSeq = qSeq; bun->genoSeq = targetSeq; bun->ffList = gfRangesToFfItem(range->components, qSeq); ssStitch(bun, stringency, minMatch, ssAliCount); getTargetName(range->tName, out->includeTargetFile, targetName); t3 = range->t3; saveAlignments(targetName, t3->nibSize, t3->start, bun, NULL, qIsRc, tIsRc, stringency, minMatch, out); ssBundleFree(&bun); } /* Cleanup for this strand of database. */ gfRangeFreeList(&rangeList); freeHash(&t3Hash); for (t3Ref = t3RefList; t3Ref != NULL; t3Ref = t3Ref->next) { struct trans3 *t3 = t3Ref->val; trans3Free(&t3); } slFreeList(&t3RefList); freeDnaSeqList(&tSeqList); } trans3Free(&qTrans); for (ss = ssList; ss != NULL; ss = ss->next) freeMem(ss->fileName); slFreeList(&ssList); lmCleanup(&lm); }
void gfAlignTrans(int *pConn, char *tSeqDir, aaSeq *seq, int minMatch, struct hash *tFileCache, struct gfOutput *out) /* Search indexed translated genome on server with an amino acid sequence. * Then load homologous bits of genome locally and do detailed alignment. * Call 'outFunction' with each alignment that is found. */ { struct ssBundle *bun; struct gfClump *clumps[2][3], *clump; struct gfRange *rangeList = NULL, *range, *rl; struct dnaSeq *targetSeq, *tSeqList = NULL; char targetName[PATH_LEN]; int tileSize; int frame, isRc = 0; struct hash *t3Hash = NULL; struct slRef *t3RefList = NULL, *ref; struct gfSeqSource *ssList = NULL, *ss; struct trans3 *t3; struct lm *lm = lmInit(0); /* Get clumps from server. */ gfQuerySeqTrans(*pConn, seq, clumps, lm, &ssList, &tileSize); close(*pConn); *pConn = -1; for (isRc = 0; isRc <= 1; ++isRc) { /* Figure out which parts of sequence we need to load. */ for (frame = 0; frame < 3; ++frame) { rl = seqClumpToRangeList(clumps[isRc][frame], frame); rangeList = slCat(rangeList, rl); } /* Convert from amino acid to nucleotide coordinates. */ rangeCoorTimes3(rangeList); slSort(&rangeList, gfRangeCmpTarget); rangeList = gfRangesBundle(rangeList, ffIntronMax); loadHashT3Ranges(rangeList, tSeqDir, tFileCache, seq->size, isRc, &t3Hash, &tSeqList, &t3RefList); /* The old range list was not very precise - it was just to get * the DNA loaded. */ gfRangeFreeList(&rangeList); /* Patch up clump list and associated sequence source to refer * to bits of genome loaded into memory. Create new range list * by extending hits in clumps. */ for (frame = 0; frame < 3; ++frame) { for (clump = clumps[isRc][frame]; clump != NULL; clump = clump->next) { struct gfSeqSource *ss = clump->target; t3 = trans3Find(t3Hash, clumpTargetName(clump), clump->tStart*3, clump->tEnd*3); ss->seq = t3->trans[frame]; ss->start = t3->start/3; ss->end = t3->end/3; clumpToHspRange(clump, seq, tileSize, frame, t3, &rangeList, TRUE, FALSE); } } slReverse(&rangeList); slSort(&rangeList, gfRangeCmpTarget); rangeList = gfRangesBundle(rangeList, ffIntronMax/3); /* Do detailed alignment of each of the clustered ranges. */ for (range = rangeList; range != NULL; range = range->next) { targetSeq = range->tSeq; AllocVar(bun); bun->qSeq = seq; bun->genoSeq = targetSeq; bun->ffList = gfRangesToFfItem(range->components, seq); bun->isProt = TRUE; t3 = hashMustFindVal(t3Hash, range->tName); bun->t3List = t3; ssStitch(bun, ffCdna, minMatch, ssAliCount); getTargetName(range->tName, out->includeTargetFile, targetName); saveAlignments(targetName, t3->nibSize, 0, bun, t3Hash, FALSE, isRc, ffCdna, minMatch, out); ssBundleFree(&bun); } /* Cleanup for this strand of database. */ gfRangeFreeList(&rangeList); freeHash(&t3Hash); for (ref = t3RefList; ref != NULL; ref = ref->next) { struct trans3 *t3 = ref->val; trans3Free(&t3); } slFreeList(&t3RefList); freeDnaSeqList(&tSeqList); } /* Final cleanup. */ for (isRc=0; isRc<=1; ++isRc) for (frame=0; frame<3; ++frame) gfClumpFreeList(&clumps[isRc][frame]); for (ss = ssList; ss != NULL; ss = ss->next) freeMem(ss->fileName); slFreeList(&ssList); lmCleanup(&lm); }