コード例 #1
0
ファイル: chkGlue.c プロジェクト: davidhoover/kent
boolean fastFind(DNA *needle, int needleSize, 
    struct patSpace *ps, struct ffAli **retAli, boolean *retRc, int *retScore)
/* Do fast alignment. */
{
struct patClump *clumpList, *clump;
boolean isRc;
struct aliList *aliList = NULL, *ali;

for (isRc = 0; isRc <= 1; ++isRc)
    {
    if (isRc)
        reverseComplement(needle, needleSize);
    if ((clumpList = patSpaceFindOne(ps, needle, needleSize)) != NULL)
        {
        for (clump = clumpList; clump != NULL; clump = clump->next)
            {
            struct dnaSeq *haySeq = clump->seq;
            DNA *haystack = haySeq->dna;
            int start = clump->start;
            struct ffAli *ffAli = ffFind(needle, needle+needleSize, 
                haystack+start, haystack+start+clump->size, ffCdna);
            if (ffAli != NULL)
                {
                AllocVar(ali);
                ali->ali = ffAli;
                ali->score = ffScoreCdna(ffAli);
                ali->isRc = isRc;
                slAddHead(&aliList, ali);
                }
            }
        slFreeList(&clumpList);
        }
    if (isRc)
        reverseComplement(needle, needleSize);
    }
if (aliList != NULL)
    {
    slSort(&aliList, cmpAliList);
    *retAli = aliList->ali;
    aliList->ali = NULL;
    *retRc = aliList->isRc;
    *retScore = aliList->score;
    for (ali = aliList->next; ali != NULL; ali = ali->next)
        ffFreeAli(&ali->ali);
    slFreeList(&aliList);
    return TRUE;
    }
else
    return FALSE;
}
コード例 #2
0
ファイル: gfBlatLib.c プロジェクト: davidhoover/kent
static boolean alignComponents(struct gfRange *combined, struct ssBundle *bun, 
	enum ffStringency stringency)
/* Align each piece of combined->components and put result in
 * bun->ffList. */
{
struct gfRange *range;
struct dnaSeq *qSeq = bun->qSeq, *tSeq = bun->genoSeq;
struct ssFfItem *ffi;
struct ffAli *ali;
int qStart, qEnd, tStart, tEnd;
int extra = 250;
boolean gotAny = FALSE;

for (range = combined->components; range != NULL; range = range->next)
    {
    /* Expand to include some extra sequence around range. */
    qStart = range->qStart - extra;
    tStart = range->tStart - extra;
    qEnd = range->qEnd + extra;
    tEnd = range->tEnd + extra;
    if (range == combined->components)
	{
        qStart -= extra;
	tStart -= extra;
	}
    if (range->next == NULL)
        {
	qEnd += extra;
	tEnd += extra;
	}
    if (qStart < combined->qStart) qStart = combined->qStart;
    if (tStart < combined->tStart) tStart = combined->tStart;
    if (qEnd > combined->qEnd) qEnd = combined->qEnd;
    if (tEnd > combined->tEnd) tEnd = combined->tEnd;
    ali = ffFind(qSeq->dna + qStart,
                 qSeq->dna + qEnd,
		 tSeq->dna + tStart - combined->tStart,
		 tSeq->dna + tEnd - combined->tStart,
		 stringency);
    if (ali != NULL)
        {
	AllocVar(ffi);
	ffi->ff = ali;
	slAddHead(&bun->ffList, ffi);
	gotAny = TRUE;
	}
    }
return gotAny;
}
コード例 #3
0
ファイル: supStitch.c プロジェクト: davidhoover/kent
struct ffAli *smallMiddleExons(struct ffAli *aliList, 
	struct ssBundle *bundle, 
	enum ffStringency stringency)
/* Look for small exons in the middle. */
{
if (bundle->t3List != NULL)
    return aliList;	/* Can't handle intense translated stuff. */
else
    {
    struct dnaSeq *qSeq =  bundle->qSeq;
    struct dnaSeq *genoSeq = bundle->genoSeq;
    struct ffAli *right, *left = NULL, *newLeft, *newRight;

    left = aliList;
    for (right = aliList->right; right != NULL; right = right->right)
        {
	if (right->hStart - left->hEnd >= 3 && right->nStart - left->nEnd >= 3)
	    {
	    newLeft = ffFind(left->nEnd, right->nStart, left->hEnd, right->hStart, stringency);
	    if (newLeft != NULL)
	        {
		newLeft = forceMonotonic(newLeft, qSeq, genoSeq, 
		    stringency, bundle->isProt, bundle->t3List );
		newRight = ffRightmost(newLeft);
                if (left != NULL)
                    {
                    left->right = newLeft;
                    newLeft->left = left;
                    }
                else
                    {
                    aliList = newLeft;
                    }
                if (right != NULL)
                    {
                    right->left = newRight;
                    newRight->right = right;
                    }
		}
	    }
	left = right;
	}
    }
return aliList;
}
コード例 #4
0
ファイル: patSpace.c プロジェクト: davidhoover/kent
void writeClump(struct blockPos *first, struct blockPos *last,
    char *cdnaName, char strand, char dir, DNA *cdna, int cdnaSize, struct cdnaAliList **pList)
/* Write hitOut one clump. */
{
struct dnaSeq *seq = first->seq;
char *bacName = seq->name;
int seqIx = first->seqIx;
int start = first->offset;
int end = last->offset+last->size;
struct ffAli *ff, *left, *right;
int extraAtEnds = minMatch*patSize;
struct cdnaAliList *cal;

start -= extraAtEnds;
if (start < 0)
    start = 0;
end += extraAtEnds;
if (end >seq->size)
    end = seq->size;

++ffSubmitted;
if (dumpMe)
	fprintf(dumpOut, "%s %d %s %d-%d\n", cdnaName, cdnaSize, bacName, start, end);
ff = ffFind(cdna, cdna+cdnaSize, seq->dna+start, seq->dna+end, ffCdna);
if (dumpMe)
    {
    fprintf(dumpOut, "ffFind = %x\n", ff);
    }
if (ff != NULL)
    {
    int ffScore = ffScoreCdna(ff);
    ++ffAccepted;
    if (dumpMe) fprintf(dumpOut, "ffScore = %d\n", ffScore);
    if (ffScore >= 22)
        {
        int hiStart, hiEnd;
        int oldStart, oldEnd;

        ffFindEnds(ff, &left, &right);
        hiStart = oldStart = left->nStart - cdna;
        hiEnd = oldEnd = right->nEnd - cdna;
        ++ffOkScore;

        if (solidMatch(&left, &right, cdna, &hiStart, &hiEnd))
            {
            int solidSize = hiEnd - hiStart;
            int solidScore;
            int seqStart, seqEnd;
            double cookedScore;

            solidScore = scoreCdna(left, right);
            cookedScore = (double)solidScore/solidSize;
            if (cookedScore > 0.25)
                {
                ++ffSolidMatch;

                seqStart = left->hStart - seq->dna;
                seqEnd = right->hEnd - seq->dna;
                fprintf(hitOut, "%3.1f%% %c %s:%d-%d (old %d-%d) of %d at %s.%d:%d-%d\n", 
                    100.0 * cookedScore, strand, cdnaName, 
                    hiStart, hiEnd, oldStart, oldEnd, cdnaSize,
                    bacName, seqIx, seqStart, seqEnd);

                if (dumpMe)
                    {
                    fprintf(bigHtmlFile, "<A NAME=i%d>", htmlIx);
                    fprintf(bigHtmlFile, "<H2>%4.1f%% %4d %4d %c %s:%d-%d of %d at %s.%d:%d-%d</H2><BR>", 
                        100.0 * cookedScore, solidScore, ffScore, strand, cdnaName, 
                        hiStart, hiEnd, cdnaSize,
                        bacName, seqIx, seqStart, seqEnd);
                    fprintf(bigHtmlFile, "</A>");
                    ffShAli(bigHtmlFile, ff, cdnaName, cdna, cdnaSize, 0,
                        bacName, seq->dna+start, end-start, start, FALSE);
                    fprintf(bigHtmlFile, "<BR><BR>\n");

                    fprintf(littleHtmlFile, "<A HREF=\"patAli.html#i%d\">", htmlIx);
                    fprintf(littleHtmlFile, "%4.1f%% %4d %4d %c %s:%d-%d of %d at %s.%d:%d-%d\n", 
                        100.0 * cookedScore, solidScore, ffScore, strand, cdnaName, 
                        hiStart, hiEnd, cdnaSize,
                        bacName, seqIx, seqStart, seqEnd);
                    fprintf(littleHtmlFile, "</A><BR>");
                    ++htmlIx;
                    }

                cal = newCal(first->bacIx, seqIx, hiStart, hiEnd, cdnaSize, strand, dir, cookedScore);
                slAddHead(pList, cal);
                }
            }
        }
    ffFreeAli(&ff);
    }
}
コード例 #5
0
void glueFindOne(struct patSpace *ps, DNA *cdna, int cdnaSize, 
    char strand, char dir, char *cdnaName, struct cdnaAliList **pList)
/* Find occurrences of DNA in patSpace and print to hitOut. */
{
struct patClump *clumpList, *clump;

clumpList = patSpaceFindOne(ps, cdna, cdnaSize);
for (clump = clumpList; clump != NULL; clump = clump->next)
    {
    struct ffAli *ff;
    struct dnaSeq *seq = clump->seq;
    DNA *tStart = seq->dna + clump->start;
    char *contigName = seq->name;
    int seqIx = clump->seqIx;
    int bacIx = clump->bacIx;

    ++ffSubmitted;
    ff = ffFind(cdna, cdna+cdnaSize, tStart, tStart + clump->size, ffCdna);
    if (ff != NULL)
        {
        int ffScore = ffScoreCdna(ff);
        ++ffAccepted;
        if (ffScore >= 22)
            {
            int hiStart, hiEnd;
            int oldStart, oldEnd;
            struct ffAli *left, *right;

            ffFindEnds(ff, &left, &right);
            hiStart = oldStart = left->nStart - cdna;
            hiEnd = oldEnd = right->nEnd - cdna;
            ++ffOkScore;

            if (solidMatch(&left, &right, cdna, &hiStart, &hiEnd))
                {
                int solidSize = hiEnd - hiStart;
                int solidScore;
                int seqStart, seqEnd;
                double cookedScore;

                solidScore = scoreCdna(left, right);
                cookedScore = (double)solidScore/solidSize;
                if (cookedScore > 0.25)
                    {
                    struct cdnaAliList *cal;
                    ++ffSolidMatch;

                    seqStart = left->hStart - seq->dna;
                    seqEnd = right->hEnd - seq->dna;
                    fprintf(hitOut, "%3.1f%% %c %s:%d-%d (old %d-%d) of %d at %s.%d:%d-%d\n", 
                        100.0 * cookedScore, strand, cdnaName, 
                        hiStart, hiEnd, oldStart, oldEnd, cdnaSize,
                        contigName, seqIx, seqStart, seqEnd);

                    cal = newCal(bacIx, seqIx, hiStart, hiEnd, cdnaSize, strand, dir, cookedScore);
                    slAddHead(pList, cal);
                    }
                }
            }
        ffFreeAli(&ff);
        }
    }
slFreeList(&clumpList);
}
コード例 #6
0
ファイル: supStitch.c プロジェクト: davidhoover/kent
struct ssBundle *ssFindBundles(struct patSpace *ps, struct dnaSeq *cSeq, 
	char *cName, enum ffStringency stringency, boolean avoidSelfSelf)
/* Find patSpace alignments.  This routine is used by psLayout but not blat. */
{
struct patClump *clumpList, *clump;
struct ssBundle *bundleList = NULL, *bun = NULL;
DNA *cdna = cSeq->dna;
int totalCdnaSize = cSeq->size;
DNA *endCdna = cdna+totalCdnaSize;
struct ssFfItem *ffl;
struct dnaSeq *lastSeq = NULL;
int maxSize = 700;
int preferredSize = 500;
int overlapSize = 250;

for (;;)
    {
    int cSize = endCdna - cdna;
    if (cSize > maxSize)
	cSize = preferredSize;
    clumpList = patSpaceFindOne(ps, cdna, cSize);
    for (clump = clumpList; clump != NULL; clump = clump->next)
	{
	struct ffAli *ff;
	struct dnaSeq *seq = clump->seq;
	DNA *tStart = seq->dna + clump->start;
	if (!avoidSelfSelf || !sameString(seq->name, cSeq->name))
	    {
	    ff = ffFind(cdna, cdna+cSize, tStart, tStart + clump->size, stringency);
	    if (ff != NULL)
		{
		if (lastSeq != seq)
		    {
		    lastSeq = seq;
		    if ((bun = findBundle(bundleList, seq)) == NULL)
			{
			AllocVar(bun);
			bun->qSeq = cSeq;
			bun->genoSeq = seq;
			bun->genoIx = clump->bacIx;
			bun->genoContigIx = clump->seqIx;
			slAddHead(&bundleList, bun);
			}
		    }
		AllocVar(ffl);
		ffl->ff = ff;
		slAddHead(&bun->ffList, ffl);
		}
	    }
	}
    cdna += cSize;
    if (cdna >= endCdna)
	break;
    cdna -= overlapSize;
    slFreeList(&clumpList);
    }
slReverse(&bundleList);
cdna = cSeq->dna;

for (bun = bundleList; bun != NULL; bun = bun->next)
    {
    ssStitch(bun, stringency, 20, 16);
    }
return bundleList;
}
コード例 #7
0
ファイル: forAg.c プロジェクト: ucsc-mus-strain-cactus/kent
int main(int argc, char *argv[])
{
    char *estName, *targetName, *oocName;
    FILE *estFile;
    struct dnaSeq *target;
    struct dnaSeq *est;
    struct patSpace *ps;
    struct patClump *clumpList, *clump;
    int estIx = 0;

    /* Check command line arguments and assign to local variables. */
    if (argc != 4)
        usage();
    estName = argv[1];
    estFile = mustOpen(estName, "rb");
    targetName = argv[2];
    oocName = argv[3];

    /* Read in target DNA from fasta files and check not too big. */
    fprintf(stderr, "Reading %s\n", targetName);
    target = faReadAllDna(targetName);
    if (totalSequenceSize(target) > 8000000)
    {
        errAbort("Can only handle 8000000 bases of genomic sequence at once, %s has %d.",
                 targetName, totalSequenceSize(target));
    }

    /* Make a pattern space index structure. */
    fprintf(stderr, "Making Pattern Space index\n");
    ps = makePatSpace(&target, 1, oocName, 4, 32000);

    /* Loop through each EST in query list. */
    printf("Searching for hits\n\n");
    while (faReadNext(estFile, NULL, TRUE, NULL, &est))
    {
        boolean isRc;   /* Reverse complemented? */

        if (++estIx % 5000 == 0)
            fprintf(stderr, "Processing EST %d\n", estIx);
        if (est->size > 20000)
        {
            warn("Very large EST sequence %s.\n"
                 "Maybe you mixed up the EST and genomic parameters?", est->name);
            usage();
        }

        for (isRc = 0; isRc <= 1; ++isRc)   /* Search both strands. */
        {
            if (isRc)
                reverseComplement(est->dna, est->size);
            clumpList = patSpaceFindOne(ps, est->dna, est->size);

            /* For each homology clump patSpace finds, do a fuzzyFinder
             * alignment of it and print the results. */
            for (clump = clumpList; clump != NULL; clump = clump->next)
            {
                struct ffAli *ali, *a;
                boolean isRc;
                int score;
                struct dnaSeq *t = clump->seq;
                DNA *tStart = t->dna + clump->start;

                ali = ffFind(est->dna, est->dna+est->size, tStart, tStart + clump->size, ffCdna);
                if (ali != NULL)
                {
                    score = ffScoreCdna(ali);
                    printf("%s hits %s strand %c score %d\n",
                           est->name, t->name, (isRc ? '+' : '-'), score);
                    for (a = ali; a != NULL; a = a->right)
                    {
                        printf("  Q %4d - %4d\t T %4d -%4d\n",
                               a->nStart - est->dna, a->nEnd - est->dna,
                               a->hStart - t->dna, a->hEnd - t->dna);
                    }
                    printf("\n");
                    ffFreeAli(&ali);
                }
                else
                {
                    printf("Couldn't align clump at %s %d-%d\n",
                           t->name, clump->start, clump->start + clump->size);
                }
            }
            slFreeList(&clumpList);
        }
        freeDnaSeq(&est);
    }
    /* Clean up time. */
    freePatSpace(&ps);
    freeSeqList(&target);
    return 0;
}