Example #1
0
struct ssBundle *ffSeedExtInMem(struct genoFind *gf, struct dnaSeq *qSeq, Bits *qMaskBits, 
	int qOffset, struct lm *lm, int minScore, boolean isRc)
/* Do seed and extend type alignment */
{
struct ssBundle *bunList = NULL, *bun;
int hitCount;
struct gfClump *clumpList, *clump;
struct gfRange *rangeList = NULL, *range;
struct dnaSeq *tSeq;

clumpList = gfFindClumpsWithQmask(gf, qSeq, qMaskBits, qOffset, lm, &hitCount);
for (clump = clumpList; clump != NULL; clump = clump->next)
    clumpToExactRange(clump, qSeq, gf->tileSize, 0, NULL, &rangeList);
slSort(&rangeList, gfRangeCmpTarget);
rangeList = gfRangesBundle(rangeList, ffIntronMax);
for (range = rangeList; range != NULL; range = range->next)
    {
    range->qStart += qOffset;
    range->qEnd += qOffset;
    tSeq = range->tSeq;
    AllocVar(bun);
    bun->qSeq = qSeq;
    bun->genoSeq = tSeq;
    bun->ffList = gfRangesToFfItem(range->components, qSeq);
    bun->isProt = FALSE;
    bun->avoidFuzzyFindKludge = TRUE;
    ssStitch(bun, ffCdna, 16, 10);
    refineBundle(gf, qSeq, qMaskBits, qOffset, tSeq, lm, bun, isRc);
    slAddHead(&bunList, bun);
    }
gfRangeFreeList(&rangeList);
gfClumpFreeList(&clumpList);
return bunList;
}
void searchOneProt(aaSeq *seq, struct genoFind *gf, FILE *f)
/* Search for protein seq in index and write results to psl. */
{
int hitCount;
struct lm *lm = lmInit(0);
struct gfClump *clumpList = gfFindClumps(gf, seq, lm, &hitCount);
gfAlignAaClumps(gf, clumpList, seq, FALSE, minScore, gvo);
gfClumpFreeList(&clumpList);
lmCleanup(&lm);
}
Example #3
0
static struct ssBundle *gfTransTransFindBundles(struct genoFind *gfs[3], struct dnaSeq *qSeq, 
	struct hash *t3Hash, boolean isRc, int minMatch, boolean isRna)
/* Look for alignment to three translations of qSeq in three translated reading frames. 
 * Save alignment via outFunction/outData. */
{
struct trans3 *qTrans = trans3New(qSeq);
int qFrame, tFrame;
struct gfClump *clumps[3][3], *clump;
struct gfRange *rangeList = NULL, *range;
int tileSize = gfs[0]->tileSize;
bioSeq *targetSeq;
struct ssBundle *bun, *bunList = NULL;
int hitCount;
struct lm *lm = lmInit(0);
enum ffStringency stringency = (isRna ? ffCdna : ffLoose);

gfTransTransFindClumps(gfs, qTrans->trans, clumps, lm, &hitCount);
for (qFrame = 0; qFrame<3; ++qFrame)
    {
    for (tFrame=0; tFrame<3; ++tFrame)
	{
	for (clump = clumps[qFrame][tFrame]; clump != NULL; clump = clump->next)
	    {
	    struct gfRange *rangeSet = NULL;
	    clumpToHspRange(clump, qTrans->trans[qFrame], tileSize, tFrame, NULL, &rangeSet, TRUE, FALSE);
	    untranslateRangeList(rangeSet, qFrame, tFrame, t3Hash, NULL, 0);
	    rangeList = slCat(rangeSet, rangeList);
	    }
	}
    }
slSort(&rangeList, gfRangeCmpTarget);
rangeList = gfRangesBundle(rangeList, 2000);
for (range = rangeList; range != NULL; range = range->next)
    {
    targetSeq = range->tSeq;
    AllocVar(bun);
    bun->qSeq = qSeq;
    bun->genoSeq = targetSeq;
    bun->ffList = gfRangesToFfItem(range->components, qSeq);
    ssStitch(bun, stringency, minMatch, ssAliCount);
    slAddHead(&bunList, bun);
    }
for (qFrame = 0; qFrame<3; ++qFrame)
    for (tFrame=0; tFrame<3; ++tFrame)
	gfClumpFreeList(&clumps[qFrame][tFrame]);
gfRangeFreeList(&rangeList);
trans3Free(&qTrans);
lmCleanup(&lm);
slReverse(&bunList);
return bunList;
}
Example #4
0
void gfFindAlignAaTrans(struct genoFind *gfs[3], aaSeq *qSeq, struct hash *t3Hash, 
	boolean tIsRc, int minMatch, struct gfOutput *out)
/* Look for qSeq alignment in three translated reading frames. Save alignment
 * via outFunction/outData. */
{
struct gfClump *clumps[3];
int frame;
struct gfClump *clump;
struct gfRange *rangeList = NULL, *range;
aaSeq *targetSeq;
struct ssBundle *bun;
int tileSize = gfs[0]->tileSize;
struct trans3 *t3;
int hitCount;
struct lm *lm = lmInit(0);

gfTransFindClumps(gfs, qSeq, clumps, lm, &hitCount);
for (frame=0; frame<3; ++frame)
    {
    for (clump = clumps[frame]; clump != NULL; clump = clump->next)
	{
	clumpToHspRange(clump, qSeq, tileSize, frame, NULL, &rangeList, TRUE, FALSE);
	}
    }
slReverse(&rangeList);
slSort(&rangeList, gfRangeCmpTarget);
rangeList = gfRangesBundle(rangeList, ffIntronMax/3);
for (range = rangeList; range != NULL; range = range->next)
    {
    targetSeq = range->tSeq;
    t3 = hashMustFindVal(t3Hash, targetSeq->name);
    AllocVar(bun);
    bun->qSeq = qSeq;
    bun->genoSeq = targetSeq;
    bun->ffList = gfRangesToFfItem(range->components, qSeq);
    bun->isProt = TRUE;
    bun->t3List = t3;
    ssStitch(bun, ffCdna, minMatch, ssAliCount);
    saveAlignments(targetSeq->name, t3->seq->size, 0, 
	bun, t3Hash, FALSE, tIsRc, ffCdna, minMatch, out);
    ssBundleFree(&bun);
    }
gfRangeFreeList(&rangeList);
for (frame=0; frame<3; ++frame)
    gfClumpFreeList(&clumps[frame]);
lmCleanup(&lm);
}
Example #5
0
void gfLongDnaInMem(struct dnaSeq *query, struct genoFind *gf, 
   boolean isRc, int minScore, Bits *qMaskBits, 
   struct gfOutput *out, boolean fastMap, boolean band)
/* Chop up query into pieces, align each, and stitch back
 * together again. */
{
int hitCount;
int maxSize = MAXSINGLEPIECESIZE;
int preferredSize = 4500;
int overlapSize = 250;
struct dnaSeq subQuery = *query;
struct lm *lm = lmInit(0);
int subOffset, subSize, nextOffset;
DNA saveEnd, *endPos;
struct ssBundle *oneBunList = NULL, *bigBunList = NULL, *bun;
struct hash *bunHash = newHash(8);

for (subOffset = 0; subOffset<query->size; subOffset = nextOffset)
    {
    struct gfClump *clumpList;
    struct gfRange *rangeList = NULL;

    /* Figure out size of this piece.  If query is
     * maxSize or less do it all.   Otherwise just
     * do prefered size, and set it up to overlap
     * with surrounding pieces by overlapSize.  */
    if (subOffset == 0 && query->size <= maxSize)
	nextOffset = subSize = query->size;
    else
        {
	subSize = preferredSize;
	if (subSize + subOffset >= query->size)
	    {
	    subSize = query->size - subOffset;
	    nextOffset = query->size;
	    }
	else
	    {
	    nextOffset = subOffset + preferredSize - overlapSize;
	    }
	}
    subQuery.dna = query->dna + subOffset;
    subQuery.size = subSize;
    endPos = &subQuery.dna[subSize];
    saveEnd = *endPos;
    *endPos = 0;
    if (band)
	{
	oneBunList = ffSeedExtInMem(gf, &subQuery, qMaskBits, subOffset, lm, minScore, isRc);
	}
    else
	{
	clumpList = gfFindClumpsWithQmask(gf, &subQuery, qMaskBits, subOffset, lm, &hitCount);
	if (fastMap)
	    {
	    oneBunList = fastMapClumpsToBundles(gf, clumpList, &subQuery);
	    }
	else
	    {
	    oneBunList = gfClumpsToBundles(clumpList, isRc, &subQuery, minScore, &rangeList);
	    gfRangeFreeList(&rangeList);
	    }
	gfClumpFreeList(&clumpList);
	}
    addToBigBundleList(&oneBunList, bunHash, &bigBunList, query);
    *endPos = saveEnd;
    }
#ifdef DEBUG
dumpBunList(bigBunList);
#endif /* DEBUG */
for (bun = bigBunList; bun != NULL; bun = bun->next)
    {
    ssStitch(bun, ffCdna, minScore, ssAliCount);
    if (!fastMap && !band)
	refineSmallExonsInBundle(bun);
    saveAlignments(bun->genoSeq->name, bun->genoSeq->size, 0, 
	bun, NULL, isRc, FALSE, ffCdna, minScore, out);
    }
ssBundleFreeList(&bigBunList);
freeHash(&bunHash);
lmCleanup(&lm);
}
Example #6
0
void gfAlignTrans(int *pConn, char *tSeqDir, aaSeq *seq, int minMatch, 
    struct hash *tFileCache, struct gfOutput *out)
/* Search indexed translated genome on server with an amino acid sequence. 
 * Then load homologous bits of genome locally and do detailed alignment.
 * Call 'outFunction' with each alignment that is found. */
{
struct ssBundle *bun;
struct gfClump *clumps[2][3], *clump;
struct gfRange *rangeList = NULL, *range, *rl;
struct dnaSeq *targetSeq, *tSeqList = NULL;
char targetName[PATH_LEN];
int tileSize;
int frame, isRc = 0;
struct hash *t3Hash = NULL;
struct slRef *t3RefList = NULL, *ref;
struct gfSeqSource *ssList = NULL, *ss;
struct trans3 *t3;
struct lm *lm = lmInit(0);

/* Get clumps from server. */
gfQuerySeqTrans(*pConn, seq, clumps, lm, &ssList, &tileSize);
close(*pConn);
*pConn = -1;

for (isRc = 0; isRc <= 1;  ++isRc)
    {
    /* Figure out which parts of sequence we need to load. */
    for (frame = 0; frame < 3; ++frame)
	{
	rl = seqClumpToRangeList(clumps[isRc][frame], frame);
	rangeList = slCat(rangeList, rl);
	}
    /* Convert from amino acid to nucleotide coordinates. */
    rangeCoorTimes3(rangeList);
    slSort(&rangeList, gfRangeCmpTarget);
    rangeList = gfRangesBundle(rangeList, ffIntronMax);
    loadHashT3Ranges(rangeList, tSeqDir, tFileCache, seq->size, 
    	isRc, &t3Hash, &tSeqList, &t3RefList);

    /* The old range list was not very precise - it was just to get
     * the DNA loaded.  */
    gfRangeFreeList(&rangeList);


    /* Patch up clump list and associated sequence source to refer
     * to bits of genome loaded into memory.  Create new range list
     * by extending hits in clumps. */
    for (frame = 0; frame < 3; ++frame)
	{
	for (clump = clumps[isRc][frame]; clump != NULL; clump = clump->next)
	    {
	    struct gfSeqSource *ss = clump->target;
	    t3 = trans3Find(t3Hash, clumpTargetName(clump), clump->tStart*3, clump->tEnd*3);
	    ss->seq = t3->trans[frame];
	    ss->start = t3->start/3;
	    ss->end = t3->end/3;
	    clumpToHspRange(clump, seq, tileSize, frame, t3, &rangeList, TRUE, FALSE);
	    }
	}
    slReverse(&rangeList);
    slSort(&rangeList, gfRangeCmpTarget);
    rangeList = gfRangesBundle(rangeList, ffIntronMax/3);

    /* Do detailed alignment of each of the clustered ranges. */
    for (range = rangeList; range != NULL; range = range->next)
	{
	targetSeq = range->tSeq;
	AllocVar(bun);
	bun->qSeq = seq;
	bun->genoSeq = targetSeq;
	bun->ffList = gfRangesToFfItem(range->components, seq);
	bun->isProt = TRUE;
	t3 = hashMustFindVal(t3Hash, range->tName);
	bun->t3List = t3;
	ssStitch(bun, ffCdna, minMatch, ssAliCount);
	getTargetName(range->tName, out->includeTargetFile, targetName);
	saveAlignments(targetName, t3->nibSize, 0, 
	    bun, t3Hash, FALSE, isRc, ffCdna, minMatch, out);
	ssBundleFree(&bun);
	}

    /* Cleanup for this strand of database. */
    gfRangeFreeList(&rangeList);
    freeHash(&t3Hash);
    for (ref = t3RefList; ref != NULL; ref = ref->next)
        {
	struct trans3 *t3 = ref->val;
	trans3Free(&t3);
	}
    slFreeList(&t3RefList);
    freeDnaSeqList(&tSeqList);
    }

/* Final cleanup. */
for (isRc=0; isRc<=1; ++isRc)
    for (frame=0; frame<3; ++frame)
	gfClumpFreeList(&clumps[isRc][frame]);
for (ss = ssList; ss != NULL; ss = ss->next)
    freeMem(ss->fileName);
slFreeList(&ssList);
lmCleanup(&lm);
}