static void addToBigBundleList(struct ssBundle **pOneList, struct hash *bunHash, struct ssBundle **pBigList, struct dnaSeq *query) /* Add bundles in one list to bigList, consolidating bundles that refer * to the same target sequence. This will destroy oneList in the process. */ { struct ssBundle *oneBun, *bigBun; for (oneBun = *pOneList; oneBun != NULL; oneBun = oneBun->next) { char *name = oneBun->genoSeq->name; if ((bigBun = hashFindVal(bunHash, name)) == NULL) { AllocVar(bigBun); slAddHead(pBigList, bigBun); hashAdd(bunHash, name, bigBun); bigBun->qSeq = query; bigBun->genoSeq = oneBun->genoSeq; bigBun->isProt = oneBun->isProt; bigBun->avoidFuzzyFindKludge = oneBun->avoidFuzzyFindKludge; } bigBun->ffList = slCat(bigBun->ffList, oneBun->ffList); oneBun->ffList = NULL; } ssBundleFreeList(pOneList); }
void alignNt(char *nt) /* Do alignments of draft bacs against one NT. */ { char indexFileName[512]; char ntFaName[512]; struct lineFile *indexLf; int lineSize; char *line; char *words[3]; int wordCount; struct patSpace *ps; struct dnaSeq *ntSeq; printf("<H1>Check Layout of %s</H1>\n", nt); printf("<PRE>"); sprintf(ntFaName, "%s/p%s.fa", faDir, nt); ntSeq = faReadAllDna(ntFaName); ps = makePatSpace(&ntSeq, 1, oocFile, 10, 500); sprintf(indexFileName, "%s/%s.index", indexDir, nt); uglyf("Checking out %s and %s\n", indexFileName, ntFaName); indexLf = lineFileOpen(indexFileName, TRUE); while (lineFileNext(indexLf, &line, &lineSize)) { wordCount = chopLine(line, words); if (wordCount > 0) { char bacFaName[512]; struct dnaSeq *contigList, *contig; char *bacAcc = words[0]; char *s = strrchr(bacAcc, '.'); if (s != NULL) *s = 0; uglyf("%s\n", bacAcc); sprintf(bacFaName, "%s/%s.fa", faDir, bacAcc); contigList = faReadAllDna(bacFaName); for (contig = contigList; contig != NULL; contig = contig->next) { boolean isRc; uglyf(" %s\n", contig->name); for (isRc = FALSE; isRc <= TRUE; isRc += 1) { struct ssBundle *bunList, *bun; bunList = ssFindBundles(ps, contig, contig->name, ffTight); for (bun = bunList; bun != NULL; bun = bun->next) { showBundle(bun, isRc); } ssBundleFreeList(&bunList); reverseComplement(contig->dna, contig->size); } } freeDnaSeqList(&contigList); } } lineFileClose(&indexLf); freeDnaSeqList(&ntSeq); }
void gfLongTransTransInMem(struct dnaSeq *query, struct genoFind *gfs[3], struct hash *t3Hash, boolean qIsRc, boolean tIsRc, boolean qIsRna, int minScore, struct gfOutput *out) /* Chop up query into pieces, align each in translated space, and stitch back * together again as nucleotides. */ { enum ffStringency stringency = (qIsRna ? ffCdna : ffLoose); int maxSize = 1500; int preferredSize = 1200; /* PreferredSize - overlapSize might need to be multiple of 3. */ int overlapSize = 270; struct dnaSeq subQuery = *query; int subOffset, subSize, nextOffset; DNA saveEnd, *endPos; struct ssBundle *oneBunList = NULL, *bigBunList = NULL, *bun; struct hash *bunHash = newHash(8); for (subOffset = 0; subOffset<query->size; subOffset = nextOffset) { /* Figure out size of this piece. If query is * maxSize or less do it all. Otherwise just * do prefered size, and set it up to overlap * with surrounding pieces by overlapSize. */ if (subOffset == 0 && query->size <= maxSize) nextOffset = subSize = query->size; else { subSize = preferredSize; if (subSize + subOffset >= query->size) { subSize = query->size - subOffset; nextOffset = query->size; } else { nextOffset = subOffset + preferredSize - overlapSize; } } subQuery.dna = query->dna + subOffset; subQuery.size = subSize; endPos = &subQuery.dna[subSize]; saveEnd = *endPos; *endPos = 0; oneBunList = gfTransTransFindBundles(gfs, &subQuery, t3Hash, qIsRc, minScore, qIsRna); addToBigBundleList(&oneBunList, bunHash, &bigBunList, query); *endPos = saveEnd; } for (bun = bigBunList; bun != NULL; bun = bun->next) { ssStitch(bun, ffCdna, minScore, ssAliCount); saveAlignments(bun->genoSeq->name, bun->genoSeq->size, 0, bun, NULL, qIsRc, tIsRc, stringency, minScore, out); } hashFree(&bunHash); ssBundleFreeList(&bigBunList); }
void oneStrand(struct patSpace *ps, struct hash *repeatHash, struct dnaSeq *otherSeq, boolean isRc, enum ffStringency stringency, FILE *out) /* Search one strand of other sequence. */ { struct ssBundle *bundleList, *bun; bundleList = ssFindBundles(ps, otherSeq, otherSeq->name, stringency, avoidSelfSelf); for (bun = bundleList; bun != NULL; bun = bun->next) { struct ssFfItem *ffi; struct dnaSeq *genoSeq = bun->genoSeq; struct repeatTracker *rt = hashLookup(repeatHash, genoSeq->name)->val; for (ffi = bun->ffList; ffi != NULL; ffi = ffi->next) { struct ffAli *ff = ffi->ff; oneAli(ff, otherSeq, rt, isRc, stringency, out); } } ssBundleFreeList(&bundleList); }
void gfLongDnaInMem(struct dnaSeq *query, struct genoFind *gf, boolean isRc, int minScore, Bits *qMaskBits, struct gfOutput *out, boolean fastMap, boolean band) /* Chop up query into pieces, align each, and stitch back * together again. */ { int hitCount; int maxSize = MAXSINGLEPIECESIZE; int preferredSize = 4500; int overlapSize = 250; struct dnaSeq subQuery = *query; struct lm *lm = lmInit(0); int subOffset, subSize, nextOffset; DNA saveEnd, *endPos; struct ssBundle *oneBunList = NULL, *bigBunList = NULL, *bun; struct hash *bunHash = newHash(8); for (subOffset = 0; subOffset<query->size; subOffset = nextOffset) { struct gfClump *clumpList; struct gfRange *rangeList = NULL; /* Figure out size of this piece. If query is * maxSize or less do it all. Otherwise just * do prefered size, and set it up to overlap * with surrounding pieces by overlapSize. */ if (subOffset == 0 && query->size <= maxSize) nextOffset = subSize = query->size; else { subSize = preferredSize; if (subSize + subOffset >= query->size) { subSize = query->size - subOffset; nextOffset = query->size; } else { nextOffset = subOffset + preferredSize - overlapSize; } } subQuery.dna = query->dna + subOffset; subQuery.size = subSize; endPos = &subQuery.dna[subSize]; saveEnd = *endPos; *endPos = 0; if (band) { oneBunList = ffSeedExtInMem(gf, &subQuery, qMaskBits, subOffset, lm, minScore, isRc); } else { clumpList = gfFindClumpsWithQmask(gf, &subQuery, qMaskBits, subOffset, lm, &hitCount); if (fastMap) { oneBunList = fastMapClumpsToBundles(gf, clumpList, &subQuery); } else { oneBunList = gfClumpsToBundles(clumpList, isRc, &subQuery, minScore, &rangeList); gfRangeFreeList(&rangeList); } gfClumpFreeList(&clumpList); } addToBigBundleList(&oneBunList, bunHash, &bigBunList, query); *endPos = saveEnd; } #ifdef DEBUG dumpBunList(bigBunList); #endif /* DEBUG */ for (bun = bigBunList; bun != NULL; bun = bun->next) { ssStitch(bun, ffCdna, minScore, ssAliCount); if (!fastMap && !band) refineSmallExonsInBundle(bun); saveAlignments(bun->genoSeq->name, bun->genoSeq->size, 0, bun, NULL, isRc, FALSE, ffCdna, minScore, out); } ssBundleFreeList(&bigBunList); freeHash(&bunHash); lmCleanup(&lm); }