struct trans3 *seqListToTrans3List(struct dnaSeq *seqList, aaSeq *transLists[3], struct hash **retHash)
/* Convert sequence list to a trans3 list and lists for each of three frames. */
{
int frame;
struct dnaSeq *seq;
struct trans3 *t3List = NULL, *t3;
struct hash *hash = newHash(0);

for (seq = seqList; seq != NULL; seq = seq->next)
    {
    t3 = trans3New(seq);
    hashAddUnique(hash, t3->name, t3);
    slAddHead(&t3List, t3);
    for (frame = 0; frame < 3; ++frame)
        {
	slAddHead(&transLists[frame], t3->trans[frame]);
	}
    }
slReverse(&t3List);
for (frame = 0; frame < 3; ++frame)
    {
    slReverse(&transLists[frame]);
    }
*retHash = hash;
return t3List;
}
Exemplo n.º 2
0
static struct ssBundle *gfTransTransFindBundles(struct genoFind *gfs[3], struct dnaSeq *qSeq, 
	struct hash *t3Hash, boolean isRc, int minMatch, boolean isRna)
/* Look for alignment to three translations of qSeq in three translated reading frames. 
 * Save alignment via outFunction/outData. */
{
struct trans3 *qTrans = trans3New(qSeq);
int qFrame, tFrame;
struct gfClump *clumps[3][3], *clump;
struct gfRange *rangeList = NULL, *range;
int tileSize = gfs[0]->tileSize;
bioSeq *targetSeq;
struct ssBundle *bun, *bunList = NULL;
int hitCount;
struct lm *lm = lmInit(0);
enum ffStringency stringency = (isRna ? ffCdna : ffLoose);

gfTransTransFindClumps(gfs, qTrans->trans, clumps, lm, &hitCount);
for (qFrame = 0; qFrame<3; ++qFrame)
    {
    for (tFrame=0; tFrame<3; ++tFrame)
	{
	for (clump = clumps[qFrame][tFrame]; clump != NULL; clump = clump->next)
	    {
	    struct gfRange *rangeSet = NULL;
	    clumpToHspRange(clump, qTrans->trans[qFrame], tileSize, tFrame, NULL, &rangeSet, TRUE, FALSE);
	    untranslateRangeList(rangeSet, qFrame, tFrame, t3Hash, NULL, 0);
	    rangeList = slCat(rangeSet, rangeList);
	    }
	}
    }
slSort(&rangeList, gfRangeCmpTarget);
rangeList = gfRangesBundle(rangeList, 2000);
for (range = rangeList; range != NULL; range = range->next)
    {
    targetSeq = range->tSeq;
    AllocVar(bun);
    bun->qSeq = qSeq;
    bun->genoSeq = targetSeq;
    bun->ffList = gfRangesToFfItem(range->components, qSeq);
    ssStitch(bun, stringency, minMatch, ssAliCount);
    slAddHead(&bunList, bun);
    }
for (qFrame = 0; qFrame<3; ++qFrame)
    for (tFrame=0; tFrame<3; ++tFrame)
	gfClumpFreeList(&clumps[qFrame][tFrame]);
gfRangeFreeList(&rangeList);
trans3Free(&qTrans);
lmCleanup(&lm);
slReverse(&bunList);
return bunList;
}
Exemplo n.º 3
0
static void loadHashT3Ranges(struct gfRange *rangeList, 
	char *tSeqDir, struct hash *tFileCache, int qSeqSize, boolean isRc, 
	struct hash **retT3Hash, struct dnaSeq **retSeqList,
	struct slRef **retT3RefList)
/* Load DNA in ranges into memory, and put translation in a hash
 * that gets returned. */
{
struct hash *t3Hash = newHash(10);
struct dnaSeq *targetSeq, *tSeqList = NULL;
struct slRef *t3RefList = NULL;
struct gfRange *range;

for (range = rangeList; range != NULL; range = range->next)
    {
    struct trans3 *t3, *oldT3;

    targetSeq = gfiExpandAndLoadCached(range, tFileCache,
    	tSeqDir, qSeqSize*3, &range->tTotalSize, TRUE, isRc, usualExpansion);
    slAddHead(&tSeqList, targetSeq);
    freez(&targetSeq->name);
    targetSeq->name = cloneString(range->tName);
    t3 = trans3New(targetSeq);
    refAdd(&t3RefList, t3);
    t3->start = range->tStart;
    t3->end = range->tEnd;
    t3->nibSize = range->tTotalSize;
    t3->isRc = isRc;
    if ((oldT3 = hashFindVal(t3Hash, range->tName)) != NULL)
	{
	slAddTail(&oldT3->next, t3);
	}
    else
	{
	hashAdd(t3Hash, range->tName, t3);
	}
    }
*retT3Hash = t3Hash;
*retSeqList = tSeqList;
*retT3RefList = t3RefList;
}
Exemplo n.º 4
0
void gfAlignTransTrans(int *pConn, char *tSeqDir, struct dnaSeq *qSeq, 
	boolean qIsRc, int minMatch, struct hash *tFileCache, 
	struct gfOutput *out, boolean isRna)
/* Search indexed translated genome on server with an dna sequence.  Translate
 * this sequence in three frames. Load homologous bits of genome locally
 * and do detailed alignment.  Call 'outFunction' with each alignment
 * that is found. */
{
struct gfClump *clumps[2][3][3], *clump;
char targetName[PATH_LEN];
int qFrame, tFrame, tIsRc;
struct gfSeqSource *ssList = NULL, *ss;
struct lm *lm = lmInit(0);
int tileSize;
struct gfRange *rangeList = NULL, *rl, *range;
struct trans3 *qTrans = trans3New(qSeq), *t3;
struct slRef *t3RefList = NULL, *t3Ref;
struct hash *t3Hash = NULL;
struct dnaSeq *tSeqList = NULL;
enum ffStringency stringency = (isRna ? ffCdna : ffLoose);

/* Query server for clumps. */
gfQuerySeqTransTrans(*pConn, qSeq, clumps, lm, &ssList, &tileSize);
close(*pConn);
*pConn = -1;

for (tIsRc=0; tIsRc <= 1; ++tIsRc)
    {
    /* Figure out which ranges need to be loaded and load them. */
    for (qFrame = 0; qFrame < 3; ++qFrame)
        {
        for (tFrame = 0; tFrame < 3; ++tFrame)
            {
	    rl = seqClumpToRangeList(clumps[tIsRc][qFrame][tFrame], tFrame);
	    rangeList = slCat(rangeList, rl);
	    }
	}
    rangeCoorTimes3(rangeList);
    slSort(&rangeList, gfRangeCmpTarget);
    rangeList = gfRangesBundle(rangeList, ffIntronMax);
    loadHashT3Ranges(rangeList, tSeqDir, tFileCache,
    	qSeq->size/3, tIsRc, &t3Hash, &tSeqList, &t3RefList);

    /* The old range list was not very precise - it was just to get
     * the DNA loaded.  */
    gfRangeFreeList(&rangeList);

    /* Patch up clump list and associated sequence source to refer
     * to bits of genome loaded into memory.  Create new range list
     * by extending hits in clumps. */
    for (qFrame = 0; qFrame < 3; ++qFrame)
	{
	for (tFrame = 0; tFrame < 3; ++tFrame)
	    {
	    for (clump = clumps[tIsRc][qFrame][tFrame]; clump != NULL; clump = clump->next)
		{
		struct gfSeqSource *ss = clump->target;
		struct gfRange *rangeSet = NULL;
		t3 = trans3Find(t3Hash, clumpTargetName(clump), clump->tStart*3, clump->tEnd*3);
		ss->seq = t3->trans[tFrame];
		ss->start = t3->start/3;
		ss->end = t3->end/3;
		clumpToHspRange(clump, qTrans->trans[qFrame], tileSize, tFrame, t3, &rangeSet, TRUE, FALSE);
		untranslateRangeList(rangeSet, qFrame, tFrame, NULL, t3, t3->start);
		rangeList = slCat(rangeSet, rangeList);
		}
	    }
	}
    slReverse(&rangeList);
    slSort(&rangeList, gfRangeCmpTarget);
    rangeList = gfRangesBundle(rangeList, ffIntronMax);

    for (range = rangeList; range != NULL; range = range->next)
	{
	struct dnaSeq *targetSeq = range->tSeq;
	struct ssBundle *bun;

	AllocVar(bun);
	bun->qSeq = qSeq;
	bun->genoSeq = targetSeq;
	bun->ffList = gfRangesToFfItem(range->components, qSeq);
	ssStitch(bun, stringency, minMatch, ssAliCount);
	getTargetName(range->tName, out->includeTargetFile, targetName);
	t3 = range->t3;
	saveAlignments(targetName, t3->nibSize, t3->start, 
	    bun, NULL, qIsRc, tIsRc, stringency, minMatch, out);
	ssBundleFree(&bun);
	}

    /* Cleanup for this strand of database. */
    gfRangeFreeList(&rangeList);
    freeHash(&t3Hash);
    for (t3Ref = t3RefList; t3Ref != NULL; t3Ref = t3Ref->next)
        {
	struct trans3 *t3 = t3Ref->val;
	trans3Free(&t3);
	}
    slFreeList(&t3RefList);
    freeDnaSeqList(&tSeqList);
    }
trans3Free(&qTrans);
for (ss = ssList; ss != NULL; ss = ss->next)
    freeMem(ss->fileName);
slFreeList(&ssList);
lmCleanup(&lm);
}