Esempio n. 1
0
static void addToBigBundleList(struct ssBundle **pOneList, struct hash *bunHash, 
	struct ssBundle **pBigList, struct dnaSeq *query)
/* Add bundles in one list to bigList, consolidating bundles that refer
 * to the same target sequence.  This will destroy oneList in the process. */
{
struct ssBundle *oneBun, *bigBun;

for (oneBun = *pOneList; oneBun != NULL; oneBun = oneBun->next)
    {
    char *name = oneBun->genoSeq->name;
    if ((bigBun = hashFindVal(bunHash, name)) == NULL)
        {
	AllocVar(bigBun);
	slAddHead(pBigList, bigBun);
	hashAdd(bunHash, name, bigBun);
	bigBun->qSeq = query;
	bigBun->genoSeq = oneBun->genoSeq;
	bigBun->isProt = oneBun->isProt;
	bigBun->avoidFuzzyFindKludge = oneBun->avoidFuzzyFindKludge;
	}
    bigBun->ffList = slCat(bigBun->ffList, oneBun->ffList);
    oneBun->ffList = NULL;
    }
ssBundleFreeList(pOneList);
}
Esempio n. 2
0
void alignNt(char *nt)
/* Do alignments of draft bacs against one NT. */
{
char indexFileName[512];
char ntFaName[512];
struct lineFile *indexLf;
int lineSize;
char *line;
char *words[3];
int wordCount;
struct patSpace *ps;
struct dnaSeq *ntSeq;

printf("<H1>Check Layout of %s</H1>\n", nt);
printf("<PRE>");
sprintf(ntFaName, "%s/p%s.fa", faDir, nt);
ntSeq = faReadAllDna(ntFaName);
ps = makePatSpace(&ntSeq, 1, oocFile, 10, 500);
sprintf(indexFileName, "%s/%s.index", indexDir, nt);
uglyf("Checking out %s and %s\n", indexFileName, ntFaName);
indexLf = lineFileOpen(indexFileName, TRUE);
while (lineFileNext(indexLf, &line, &lineSize))
    {
    wordCount = chopLine(line, words);
    if (wordCount > 0)
	{
	char bacFaName[512];
	struct dnaSeq *contigList, *contig;
	char *bacAcc = words[0];
	char *s = strrchr(bacAcc, '.');
	if (s != NULL)
	    *s = 0;
	uglyf("%s\n", bacAcc);
	sprintf(bacFaName, "%s/%s.fa", faDir, bacAcc);
	contigList = faReadAllDna(bacFaName);
	for (contig = contigList; contig != NULL; contig = contig->next)
	    {
	    boolean isRc;
	    uglyf(" %s\n", contig->name);
	    for (isRc = FALSE; isRc <= TRUE; isRc += 1)
		{
		struct ssBundle *bunList, *bun;
		bunList = ssFindBundles(ps, contig, contig->name, ffTight);
		for (bun = bunList; bun != NULL; bun = bun->next)
		    {
		    showBundle(bun, isRc);
		    }
		ssBundleFreeList(&bunList);
		reverseComplement(contig->dna, contig->size);
		}
	    }
	freeDnaSeqList(&contigList);
	}
    }
lineFileClose(&indexLf);
freeDnaSeqList(&ntSeq);
}
Esempio n. 3
0
void gfLongTransTransInMem(struct dnaSeq *query, struct genoFind *gfs[3], 
   struct hash *t3Hash, boolean qIsRc, boolean tIsRc, boolean qIsRna,
   int minScore, struct gfOutput *out)
/* Chop up query into pieces, align each in translated space, and stitch back
 * together again as nucleotides. */
{
enum ffStringency stringency = (qIsRna ? ffCdna : ffLoose);
int maxSize = 1500;
int preferredSize = 1200;	/* PreferredSize - overlapSize might need to be multiple of 3. */
int overlapSize = 270;
struct dnaSeq subQuery = *query;
int subOffset, subSize, nextOffset;
DNA saveEnd, *endPos;
struct ssBundle *oneBunList = NULL, *bigBunList = NULL, *bun;
struct hash *bunHash = newHash(8);

for (subOffset = 0; subOffset<query->size; subOffset = nextOffset)
    {
    /* Figure out size of this piece.  If query is
     * maxSize or less do it all.   Otherwise just
     * do prefered size, and set it up to overlap
     * with surrounding pieces by overlapSize.  */
    if (subOffset == 0 && query->size <= maxSize)
	nextOffset = subSize = query->size;
    else
        {
	subSize = preferredSize;
	if (subSize + subOffset >= query->size)
	    {
	    subSize = query->size - subOffset;
	    nextOffset = query->size;
	    }
	else
	    {
	    nextOffset = subOffset + preferredSize - overlapSize;
	    }
	}
    subQuery.dna = query->dna + subOffset;
    subQuery.size = subSize;
    endPos = &subQuery.dna[subSize];
    saveEnd = *endPos;
    *endPos = 0;
    oneBunList = gfTransTransFindBundles(gfs, &subQuery, t3Hash, qIsRc, minScore, qIsRna);
    addToBigBundleList(&oneBunList, bunHash, &bigBunList, query);
    *endPos = saveEnd;
    }
for (bun = bigBunList; bun != NULL; bun = bun->next)
    {
    ssStitch(bun, ffCdna, minScore, ssAliCount);
    saveAlignments(bun->genoSeq->name, bun->genoSeq->size, 0, 
	bun, NULL, qIsRc, tIsRc, stringency, minScore, out);
    }
hashFree(&bunHash);
ssBundleFreeList(&bigBunList);
}
void oneStrand(struct patSpace *ps, struct hash *repeatHash, struct dnaSeq *otherSeq,
    boolean isRc,  enum ffStringency stringency, FILE *out)
/* Search one strand of other sequence. */
{
struct ssBundle *bundleList, *bun;

bundleList = ssFindBundles(ps, otherSeq, otherSeq->name, stringency, avoidSelfSelf);
for (bun = bundleList; bun != NULL; bun = bun->next)
    {
    struct ssFfItem *ffi;
    struct dnaSeq *genoSeq = bun->genoSeq;
    struct repeatTracker *rt = hashLookup(repeatHash, genoSeq->name)->val;
    for (ffi = bun->ffList; ffi != NULL; ffi = ffi->next)
	{
	struct ffAli *ff = ffi->ff;
	oneAli(ff, otherSeq, rt, isRc, stringency, out);
	}
    }
ssBundleFreeList(&bundleList);
}
Esempio n. 5
0
void gfLongDnaInMem(struct dnaSeq *query, struct genoFind *gf, 
   boolean isRc, int minScore, Bits *qMaskBits, 
   struct gfOutput *out, boolean fastMap, boolean band)
/* Chop up query into pieces, align each, and stitch back
 * together again. */
{
int hitCount;
int maxSize = MAXSINGLEPIECESIZE;
int preferredSize = 4500;
int overlapSize = 250;
struct dnaSeq subQuery = *query;
struct lm *lm = lmInit(0);
int subOffset, subSize, nextOffset;
DNA saveEnd, *endPos;
struct ssBundle *oneBunList = NULL, *bigBunList = NULL, *bun;
struct hash *bunHash = newHash(8);

for (subOffset = 0; subOffset<query->size; subOffset = nextOffset)
    {
    struct gfClump *clumpList;
    struct gfRange *rangeList = NULL;

    /* Figure out size of this piece.  If query is
     * maxSize or less do it all.   Otherwise just
     * do prefered size, and set it up to overlap
     * with surrounding pieces by overlapSize.  */
    if (subOffset == 0 && query->size <= maxSize)
	nextOffset = subSize = query->size;
    else
        {
	subSize = preferredSize;
	if (subSize + subOffset >= query->size)
	    {
	    subSize = query->size - subOffset;
	    nextOffset = query->size;
	    }
	else
	    {
	    nextOffset = subOffset + preferredSize - overlapSize;
	    }
	}
    subQuery.dna = query->dna + subOffset;
    subQuery.size = subSize;
    endPos = &subQuery.dna[subSize];
    saveEnd = *endPos;
    *endPos = 0;
    if (band)
	{
	oneBunList = ffSeedExtInMem(gf, &subQuery, qMaskBits, subOffset, lm, minScore, isRc);
	}
    else
	{
	clumpList = gfFindClumpsWithQmask(gf, &subQuery, qMaskBits, subOffset, lm, &hitCount);
	if (fastMap)
	    {
	    oneBunList = fastMapClumpsToBundles(gf, clumpList, &subQuery);
	    }
	else
	    {
	    oneBunList = gfClumpsToBundles(clumpList, isRc, &subQuery, minScore, &rangeList);
	    gfRangeFreeList(&rangeList);
	    }
	gfClumpFreeList(&clumpList);
	}
    addToBigBundleList(&oneBunList, bunHash, &bigBunList, query);
    *endPos = saveEnd;
    }
#ifdef DEBUG
dumpBunList(bigBunList);
#endif /* DEBUG */
for (bun = bigBunList; bun != NULL; bun = bun->next)
    {
    ssStitch(bun, ffCdna, minScore, ssAliCount);
    if (!fastMap && !band)
	refineSmallExonsInBundle(bun);
    saveAlignments(bun->genoSeq->name, bun->genoSeq->size, 0, 
	bun, NULL, isRc, FALSE, ffCdna, minScore, out);
    }
ssBundleFreeList(&bigBunList);
freeHash(&bunHash);
lmCleanup(&lm);
}