static struct ssBundle *fastMapClumpsToBundles(struct genoFind *gf, struct gfClump *clumpList, bioSeq *qSeq) /* Convert gfClumps ffAlis. */ { struct gfClump *clump; struct gfRange *rangeList = NULL, *range; bioSeq *targetSeq; struct ssBundle *bunList = NULL, *bun; for (clump = clumpList; clump != NULL; clump = clump->next) clumpToHspRange(clump, qSeq, gf->tileSize, 0, NULL, &rangeList, FALSE, TRUE); slReverse(&rangeList); slSort(&rangeList, gfRangeCmpTarget); rangeList = gfRangesBundle(rangeList, 256); for (range = rangeList; range != NULL; range = range->next) { targetSeq = range->tSeq; AllocVar(bun); bun->qSeq = qSeq; bun->genoSeq = targetSeq; bun->ffList = gfRangesToFfItem(range->components, qSeq); bun->isProt = FALSE; slAddHead(&bunList, bun); } gfRangeFreeList(&rangeList); return bunList; }
struct ssBundle *ffSeedExtInMem(struct genoFind *gf, struct dnaSeq *qSeq, Bits *qMaskBits, int qOffset, struct lm *lm, int minScore, boolean isRc) /* Do seed and extend type alignment */ { struct ssBundle *bunList = NULL, *bun; int hitCount; struct gfClump *clumpList, *clump; struct gfRange *rangeList = NULL, *range; struct dnaSeq *tSeq; clumpList = gfFindClumpsWithQmask(gf, qSeq, qMaskBits, qOffset, lm, &hitCount); for (clump = clumpList; clump != NULL; clump = clump->next) clumpToExactRange(clump, qSeq, gf->tileSize, 0, NULL, &rangeList); slSort(&rangeList, gfRangeCmpTarget); rangeList = gfRangesBundle(rangeList, ffIntronMax); for (range = rangeList; range != NULL; range = range->next) { range->qStart += qOffset; range->qEnd += qOffset; tSeq = range->tSeq; AllocVar(bun); bun->qSeq = qSeq; bun->genoSeq = tSeq; bun->ffList = gfRangesToFfItem(range->components, qSeq); bun->isProt = FALSE; bun->avoidFuzzyFindKludge = TRUE; ssStitch(bun, ffCdna, 16, 10); refineBundle(gf, qSeq, qMaskBits, qOffset, tSeq, lm, bun, isRc); slAddHead(&bunList, bun); } gfRangeFreeList(&rangeList); gfClumpFreeList(&clumpList); return bunList; }
static struct ssBundle *gfClumpsToBundles(struct gfClump *clumpList, boolean isRc, struct dnaSeq *seq, int minScore, struct gfRange **retRangeList) /* Convert gfClumps to an actual alignments (ssBundles) */ { struct ssBundle *bun, *bunList = NULL; struct gfRange *rangeList = NULL, *range; struct dnaSeq *targetSeq; rangeList = seqClumpToRangeList(clumpList, 0); slSort(&rangeList, gfRangeCmpTarget); rangeList = gfRangesBundle(rangeList, 2000); for (range = rangeList; range != NULL; range = range->next) { targetSeq = range->tSeq; gfiExpandRange(range, seq->size, targetSeq->size, FALSE, isRc, usualExpansion); range->tStart = 0; range->tEnd = targetSeq->size; AllocVar(bun); bun->qSeq = seq; bun->genoSeq = targetSeq; alignComponents(range, bun, ffCdna); ssStitch(bun, ffCdna, minScore, ssAliCount); slAddHead(&bunList, bun); } slReverse(&bunList); *retRangeList = rangeList; return bunList; }
void gfAlignStrand(int *pConn, char *tSeqDir, struct dnaSeq *seq, boolean isRc, int minMatch, struct hash *tFileCache, struct gfOutput *out) /* Search genome on server with one strand of other sequence to find homology. * Then load homologous bits of genome locally and do detailed alignment. * Call 'outFunction' with each alignment that is found. */ { struct ssBundle *bun; struct gfRange *rangeList = NULL, *range; struct dnaSeq *targetSeq; char targetName[PATH_LEN]; rangeList = gfQuerySeq(*pConn, seq); close(*pConn); *pConn = -1; slSort(&rangeList, gfRangeCmpTarget); rangeList = gfRangesBundle(rangeList, ffIntronMax); for (range = rangeList; range != NULL; range = range->next) { getTargetName(range->tName, out->includeTargetFile, targetName); targetSeq = gfiExpandAndLoadCached(range, tFileCache, tSeqDir, seq->size, &range->tTotalSize, FALSE, FALSE, usualExpansion); AllocVar(bun); bun->qSeq = seq; bun->genoSeq = targetSeq; alignComponents(range, bun, ffCdna); ssStitch(bun, ffCdna, minMatch, ssAliCount); saveAlignments(targetName, range->tTotalSize, range->tStart, bun, NULL, isRc, FALSE, ffCdna, minMatch, out); ssBundleFree(&bun); freeDnaSeq(&targetSeq); } gfRangeFreeList(&rangeList); }
static struct ssBundle *gfTransTransFindBundles(struct genoFind *gfs[3], struct dnaSeq *qSeq, struct hash *t3Hash, boolean isRc, int minMatch, boolean isRna) /* Look for alignment to three translations of qSeq in three translated reading frames. * Save alignment via outFunction/outData. */ { struct trans3 *qTrans = trans3New(qSeq); int qFrame, tFrame; struct gfClump *clumps[3][3], *clump; struct gfRange *rangeList = NULL, *range; int tileSize = gfs[0]->tileSize; bioSeq *targetSeq; struct ssBundle *bun, *bunList = NULL; int hitCount; struct lm *lm = lmInit(0); enum ffStringency stringency = (isRna ? ffCdna : ffLoose); gfTransTransFindClumps(gfs, qTrans->trans, clumps, lm, &hitCount); for (qFrame = 0; qFrame<3; ++qFrame) { for (tFrame=0; tFrame<3; ++tFrame) { for (clump = clumps[qFrame][tFrame]; clump != NULL; clump = clump->next) { struct gfRange *rangeSet = NULL; clumpToHspRange(clump, qTrans->trans[qFrame], tileSize, tFrame, NULL, &rangeSet, TRUE, FALSE); untranslateRangeList(rangeSet, qFrame, tFrame, t3Hash, NULL, 0); rangeList = slCat(rangeSet, rangeList); } } } slSort(&rangeList, gfRangeCmpTarget); rangeList = gfRangesBundle(rangeList, 2000); for (range = rangeList; range != NULL; range = range->next) { targetSeq = range->tSeq; AllocVar(bun); bun->qSeq = qSeq; bun->genoSeq = targetSeq; bun->ffList = gfRangesToFfItem(range->components, qSeq); ssStitch(bun, stringency, minMatch, ssAliCount); slAddHead(&bunList, bun); } for (qFrame = 0; qFrame<3; ++qFrame) for (tFrame=0; tFrame<3; ++tFrame) gfClumpFreeList(&clumps[qFrame][tFrame]); gfRangeFreeList(&rangeList); trans3Free(&qTrans); lmCleanup(&lm); slReverse(&bunList); return bunList; }
void gfFindAlignAaTrans(struct genoFind *gfs[3], aaSeq *qSeq, struct hash *t3Hash, boolean tIsRc, int minMatch, struct gfOutput *out) /* Look for qSeq alignment in three translated reading frames. Save alignment * via outFunction/outData. */ { struct gfClump *clumps[3]; int frame; struct gfClump *clump; struct gfRange *rangeList = NULL, *range; aaSeq *targetSeq; struct ssBundle *bun; int tileSize = gfs[0]->tileSize; struct trans3 *t3; int hitCount; struct lm *lm = lmInit(0); gfTransFindClumps(gfs, qSeq, clumps, lm, &hitCount); for (frame=0; frame<3; ++frame) { for (clump = clumps[frame]; clump != NULL; clump = clump->next) { clumpToHspRange(clump, qSeq, tileSize, frame, NULL, &rangeList, TRUE, FALSE); } } slReverse(&rangeList); slSort(&rangeList, gfRangeCmpTarget); rangeList = gfRangesBundle(rangeList, ffIntronMax/3); for (range = rangeList; range != NULL; range = range->next) { targetSeq = range->tSeq; t3 = hashMustFindVal(t3Hash, targetSeq->name); AllocVar(bun); bun->qSeq = qSeq; bun->genoSeq = targetSeq; bun->ffList = gfRangesToFfItem(range->components, qSeq); bun->isProt = TRUE; bun->t3List = t3; ssStitch(bun, ffCdna, minMatch, ssAliCount); saveAlignments(targetSeq->name, t3->seq->size, 0, bun, t3Hash, FALSE, tIsRc, ffCdna, minMatch, out); ssBundleFree(&bun); } gfRangeFreeList(&rangeList); for (frame=0; frame<3; ++frame) gfClumpFreeList(&clumps[frame]); lmCleanup(&lm); }
static void gfAlignSomeClumps(struct genoFind *gf, struct gfClump *clumpList, bioSeq *seq, boolean isRc, int minMatch, struct gfOutput *out, boolean isProt, enum ffStringency stringency) /* Convert gfClumps to an actual alignment that gets saved via * outFunction/outData. */ { struct gfClump *clump; struct gfRange *rangeList = NULL, *range; bioSeq *targetSeq; struct ssBundle *bun; int intronMax = ffIntronMax; if (isProt) intronMax /= 3; for (clump = clumpList; clump != NULL; clump = clump->next) { clumpToHspRange(clump, seq, gf->tileSize, 0, NULL, &rangeList, isProt, FALSE); } slReverse(&rangeList); slSort(&rangeList, gfRangeCmpTarget); rangeList = gfRangesBundle(rangeList, intronMax); for (range = rangeList; range != NULL; range = range->next) { targetSeq = range->tSeq; AllocVar(bun); bun->qSeq = seq; bun->genoSeq = targetSeq; bun->ffList = gfRangesToFfItem(range->components, seq); bun->isProt = isProt; ssStitch(bun, stringency, minMatch, ssAliCount); saveAlignments(targetSeq->name, targetSeq->size, 0, bun, NULL, isRc, FALSE, stringency, minMatch, out); ssBundleFree(&bun); } gfRangeFreeList(&rangeList); }
void gfAlignTransTrans(int *pConn, char *tSeqDir, struct dnaSeq *qSeq, boolean qIsRc, int minMatch, struct hash *tFileCache, struct gfOutput *out, boolean isRna) /* Search indexed translated genome on server with an dna sequence. Translate * this sequence in three frames. Load homologous bits of genome locally * and do detailed alignment. Call 'outFunction' with each alignment * that is found. */ { struct gfClump *clumps[2][3][3], *clump; char targetName[PATH_LEN]; int qFrame, tFrame, tIsRc; struct gfSeqSource *ssList = NULL, *ss; struct lm *lm = lmInit(0); int tileSize; struct gfRange *rangeList = NULL, *rl, *range; struct trans3 *qTrans = trans3New(qSeq), *t3; struct slRef *t3RefList = NULL, *t3Ref; struct hash *t3Hash = NULL; struct dnaSeq *tSeqList = NULL; enum ffStringency stringency = (isRna ? ffCdna : ffLoose); /* Query server for clumps. */ gfQuerySeqTransTrans(*pConn, qSeq, clumps, lm, &ssList, &tileSize); close(*pConn); *pConn = -1; for (tIsRc=0; tIsRc <= 1; ++tIsRc) { /* Figure out which ranges need to be loaded and load them. */ for (qFrame = 0; qFrame < 3; ++qFrame) { for (tFrame = 0; tFrame < 3; ++tFrame) { rl = seqClumpToRangeList(clumps[tIsRc][qFrame][tFrame], tFrame); rangeList = slCat(rangeList, rl); } } rangeCoorTimes3(rangeList); slSort(&rangeList, gfRangeCmpTarget); rangeList = gfRangesBundle(rangeList, ffIntronMax); loadHashT3Ranges(rangeList, tSeqDir, tFileCache, qSeq->size/3, tIsRc, &t3Hash, &tSeqList, &t3RefList); /* The old range list was not very precise - it was just to get * the DNA loaded. */ gfRangeFreeList(&rangeList); /* Patch up clump list and associated sequence source to refer * to bits of genome loaded into memory. Create new range list * by extending hits in clumps. */ for (qFrame = 0; qFrame < 3; ++qFrame) { for (tFrame = 0; tFrame < 3; ++tFrame) { for (clump = clumps[tIsRc][qFrame][tFrame]; clump != NULL; clump = clump->next) { struct gfSeqSource *ss = clump->target; struct gfRange *rangeSet = NULL; t3 = trans3Find(t3Hash, clumpTargetName(clump), clump->tStart*3, clump->tEnd*3); ss->seq = t3->trans[tFrame]; ss->start = t3->start/3; ss->end = t3->end/3; clumpToHspRange(clump, qTrans->trans[qFrame], tileSize, tFrame, t3, &rangeSet, TRUE, FALSE); untranslateRangeList(rangeSet, qFrame, tFrame, NULL, t3, t3->start); rangeList = slCat(rangeSet, rangeList); } } } slReverse(&rangeList); slSort(&rangeList, gfRangeCmpTarget); rangeList = gfRangesBundle(rangeList, ffIntronMax); for (range = rangeList; range != NULL; range = range->next) { struct dnaSeq *targetSeq = range->tSeq; struct ssBundle *bun; AllocVar(bun); bun->qSeq = qSeq; bun->genoSeq = targetSeq; bun->ffList = gfRangesToFfItem(range->components, qSeq); ssStitch(bun, stringency, minMatch, ssAliCount); getTargetName(range->tName, out->includeTargetFile, targetName); t3 = range->t3; saveAlignments(targetName, t3->nibSize, t3->start, bun, NULL, qIsRc, tIsRc, stringency, minMatch, out); ssBundleFree(&bun); } /* Cleanup for this strand of database. */ gfRangeFreeList(&rangeList); freeHash(&t3Hash); for (t3Ref = t3RefList; t3Ref != NULL; t3Ref = t3Ref->next) { struct trans3 *t3 = t3Ref->val; trans3Free(&t3); } slFreeList(&t3RefList); freeDnaSeqList(&tSeqList); } trans3Free(&qTrans); for (ss = ssList; ss != NULL; ss = ss->next) freeMem(ss->fileName); slFreeList(&ssList); lmCleanup(&lm); }
void gfAlignTrans(int *pConn, char *tSeqDir, aaSeq *seq, int minMatch, struct hash *tFileCache, struct gfOutput *out) /* Search indexed translated genome on server with an amino acid sequence. * Then load homologous bits of genome locally and do detailed alignment. * Call 'outFunction' with each alignment that is found. */ { struct ssBundle *bun; struct gfClump *clumps[2][3], *clump; struct gfRange *rangeList = NULL, *range, *rl; struct dnaSeq *targetSeq, *tSeqList = NULL; char targetName[PATH_LEN]; int tileSize; int frame, isRc = 0; struct hash *t3Hash = NULL; struct slRef *t3RefList = NULL, *ref; struct gfSeqSource *ssList = NULL, *ss; struct trans3 *t3; struct lm *lm = lmInit(0); /* Get clumps from server. */ gfQuerySeqTrans(*pConn, seq, clumps, lm, &ssList, &tileSize); close(*pConn); *pConn = -1; for (isRc = 0; isRc <= 1; ++isRc) { /* Figure out which parts of sequence we need to load. */ for (frame = 0; frame < 3; ++frame) { rl = seqClumpToRangeList(clumps[isRc][frame], frame); rangeList = slCat(rangeList, rl); } /* Convert from amino acid to nucleotide coordinates. */ rangeCoorTimes3(rangeList); slSort(&rangeList, gfRangeCmpTarget); rangeList = gfRangesBundle(rangeList, ffIntronMax); loadHashT3Ranges(rangeList, tSeqDir, tFileCache, seq->size, isRc, &t3Hash, &tSeqList, &t3RefList); /* The old range list was not very precise - it was just to get * the DNA loaded. */ gfRangeFreeList(&rangeList); /* Patch up clump list and associated sequence source to refer * to bits of genome loaded into memory. Create new range list * by extending hits in clumps. */ for (frame = 0; frame < 3; ++frame) { for (clump = clumps[isRc][frame]; clump != NULL; clump = clump->next) { struct gfSeqSource *ss = clump->target; t3 = trans3Find(t3Hash, clumpTargetName(clump), clump->tStart*3, clump->tEnd*3); ss->seq = t3->trans[frame]; ss->start = t3->start/3; ss->end = t3->end/3; clumpToHspRange(clump, seq, tileSize, frame, t3, &rangeList, TRUE, FALSE); } } slReverse(&rangeList); slSort(&rangeList, gfRangeCmpTarget); rangeList = gfRangesBundle(rangeList, ffIntronMax/3); /* Do detailed alignment of each of the clustered ranges. */ for (range = rangeList; range != NULL; range = range->next) { targetSeq = range->tSeq; AllocVar(bun); bun->qSeq = seq; bun->genoSeq = targetSeq; bun->ffList = gfRangesToFfItem(range->components, seq); bun->isProt = TRUE; t3 = hashMustFindVal(t3Hash, range->tName); bun->t3List = t3; ssStitch(bun, ffCdna, minMatch, ssAliCount); getTargetName(range->tName, out->includeTargetFile, targetName); saveAlignments(targetName, t3->nibSize, 0, bun, t3Hash, FALSE, isRc, ffCdna, minMatch, out); ssBundleFree(&bun); } /* Cleanup for this strand of database. */ gfRangeFreeList(&rangeList); freeHash(&t3Hash); for (ref = t3RefList; ref != NULL; ref = ref->next) { struct trans3 *t3 = ref->val; trans3Free(&t3); } slFreeList(&t3RefList); freeDnaSeqList(&tSeqList); } /* Final cleanup. */ for (isRc=0; isRc<=1; ++isRc) for (frame=0; frame<3; ++frame) gfClumpFreeList(&clumps[isRc][frame]); for (ss = ssList; ss != NULL; ss = ss->next) freeMem(ss->fileName); slFreeList(&ssList); lmCleanup(&lm); }