struct ssBundle *ffSeedExtInMem(struct genoFind *gf, struct dnaSeq *qSeq, Bits *qMaskBits, int qOffset, struct lm *lm, int minScore, boolean isRc) /* Do seed and extend type alignment */ { struct ssBundle *bunList = NULL, *bun; int hitCount; struct gfClump *clumpList, *clump; struct gfRange *rangeList = NULL, *range; struct dnaSeq *tSeq; clumpList = gfFindClumpsWithQmask(gf, qSeq, qMaskBits, qOffset, lm, &hitCount); for (clump = clumpList; clump != NULL; clump = clump->next) clumpToExactRange(clump, qSeq, gf->tileSize, 0, NULL, &rangeList); slSort(&rangeList, gfRangeCmpTarget); rangeList = gfRangesBundle(rangeList, ffIntronMax); for (range = rangeList; range != NULL; range = range->next) { range->qStart += qOffset; range->qEnd += qOffset; tSeq = range->tSeq; AllocVar(bun); bun->qSeq = qSeq; bun->genoSeq = tSeq; bun->ffList = gfRangesToFfItem(range->components, qSeq); bun->isProt = FALSE; bun->avoidFuzzyFindKludge = TRUE; ssStitch(bun, ffCdna, 16, 10); refineBundle(gf, qSeq, qMaskBits, qOffset, tSeq, lm, bun, isRc); slAddHead(&bunList, bun); } gfRangeFreeList(&rangeList); gfClumpFreeList(&clumpList); return bunList; }
void gfLongDnaInMem(struct dnaSeq *query, struct genoFind *gf, boolean isRc, int minScore, Bits *qMaskBits, struct gfOutput *out, boolean fastMap, boolean band) /* Chop up query into pieces, align each, and stitch back * together again. */ { int hitCount; int maxSize = MAXSINGLEPIECESIZE; int preferredSize = 4500; int overlapSize = 250; struct dnaSeq subQuery = *query; struct lm *lm = lmInit(0); int subOffset, subSize, nextOffset; DNA saveEnd, *endPos; struct ssBundle *oneBunList = NULL, *bigBunList = NULL, *bun; struct hash *bunHash = newHash(8); for (subOffset = 0; subOffset<query->size; subOffset = nextOffset) { struct gfClump *clumpList; struct gfRange *rangeList = NULL; /* Figure out size of this piece. If query is * maxSize or less do it all. Otherwise just * do prefered size, and set it up to overlap * with surrounding pieces by overlapSize. */ if (subOffset == 0 && query->size <= maxSize) nextOffset = subSize = query->size; else { subSize = preferredSize; if (subSize + subOffset >= query->size) { subSize = query->size - subOffset; nextOffset = query->size; } else { nextOffset = subOffset + preferredSize - overlapSize; } } subQuery.dna = query->dna + subOffset; subQuery.size = subSize; endPos = &subQuery.dna[subSize]; saveEnd = *endPos; *endPos = 0; if (band) { oneBunList = ffSeedExtInMem(gf, &subQuery, qMaskBits, subOffset, lm, minScore, isRc); } else { clumpList = gfFindClumpsWithQmask(gf, &subQuery, qMaskBits, subOffset, lm, &hitCount); if (fastMap) { oneBunList = fastMapClumpsToBundles(gf, clumpList, &subQuery); } else { oneBunList = gfClumpsToBundles(clumpList, isRc, &subQuery, minScore, &rangeList); gfRangeFreeList(&rangeList); } gfClumpFreeList(&clumpList); } addToBigBundleList(&oneBunList, bunHash, &bigBunList, query); *endPos = saveEnd; } #ifdef DEBUG dumpBunList(bigBunList); #endif /* DEBUG */ for (bun = bigBunList; bun != NULL; bun = bun->next) { ssStitch(bun, ffCdna, minScore, ssAliCount); if (!fastMap && !band) refineSmallExonsInBundle(bun); saveAlignments(bun->genoSeq->name, bun->genoSeq->size, 0, bun, NULL, isRc, FALSE, ffCdna, minScore, out); } ssBundleFreeList(&bigBunList); freeHash(&bunHash); lmCleanup(&lm); }