Beispiel #1
0
void writePslFrags(struct psl *psl, FILE *f)
/* Look into psl and figure out if we want to write out
 * all or part of it. */
{
int i;
int totalSize = 0;
int size = pslBlockTotalSize(psl);

for (i=0; i<psl->blockCount; ++i)
    {
    if ((size = psl->blockSizes[i]) >= 30)
        {
	static struct psl p;
	unsigned blockSizes, qStarts, tStarts;
	p.match = roundingScale(psl->match, size, totalSize);
	p.misMatch = roundingScale(psl->misMatch, size, totalSize);
	p.repMatch = roundingScale(psl->repMatch, size, totalSize);
	p.nCount = roundingScale(psl->nCount, size, totalSize);
	p.strand[0] = psl->strand[0];
	p.strand[1] = psl->strand[1];
	p.qName = psl->qName;
	p.qSize = psl->qSize;
	p.tName =psl->tName;
	p.tSize = psl->tSize;
	p.blockCount = 1;
	p.blockSizes = &blockSizes;
	blockSizes = size;
	p.qStarts = &qStarts;
	qStarts = psl->qStarts[i];
	p.tStarts = &tStarts;
	tStarts = psl->tStarts[i];
	if (p.strand[0] == '-')
	    p.qStart = psl->qSize - (qStarts + size);
	else
	    p.qStart = qStarts;
	if (p.strand[1] == '-')
	    p.tStart = psl->tSize - (tStarts + size);
	else
	    p.tStart = tStarts;
	p.qEnd = p.qStart + size;
	p.tEnd = p.tStart + size;
	pslTabOut(&p, f);
	}
    }
}
void pslMrnaCover(char *pslFile, char *faFile)
/* pslMrnaCover - Make histogram of coverage percentage of mRNA in psl. */
{
static int histogram[101];
int i;
int qAli;
struct hash *hash;
struct rnaCover *rcList = NULL, *rc;
struct lineFile *lf = pslFileOpen(pslFile);
struct psl *psl;

/* Build up list of all sequences. */
readFa(faFile, &rcList, &hash);

/* Scan psls and see maximum amount each is aligned. */
while ((psl = pslNext(lf)) != NULL)
    {
    if (psl->qSize >= minSize)
	{
	if ((rc = hashFindVal(hash, psl->qName)) == NULL)
	    errAbort("%s is in %s but not %s", psl->qName, pslFile, faFile);
	if (rc->qSize != psl->qSize)
	    errAbort("%s is %d bytes in %s but %d in %s", psl->qName,
		rc->qSize, faFile, psl->qSize, pslFile);
	qAli = psl->match + psl->repMatch + psl->misMatch;
	if (qAli > rc->qMaxAli)
	   rc->qMaxAli = qAli;
	}
    pslFree(&psl);
    }
lineFileClose(&lf);

/* Open file to keep track of non-aligners */
if (listZero != NULL)
    {
    FILE *f = mustOpen(listZero, "w");
    for (rc = rcList; rc != NULL; rc = rc->next)
	{
	if (rc->qMaxAli == 0)
	    fprintf(f, "%s\t%d\n", rc->name, rc->qSize);
	}
    }

/* Talley up percentage aligning in histogram. */
for (rc = rcList; rc != NULL; rc = rc->next)
    {
    int histIx = roundingScale(100, rc->qMaxAli, rc->qSize);
    assert(histIx <= 100);
    histogram[histIx] += 1;
    }

/* Print out histogram. */
for (i=0; i<=100; ++i)
    {
    printf("%3d%% %6d\n", i, histogram[i]);
    }
}
Beispiel #3
0
boolean detailTest(struct psl *psl)
/* Detailed pass/fail test. */
{
int size = pslBlockTotalSize(psl);
int badFactor = psl->misMatch + psl->tNumInsert + psl->qNumInsert + 2*log(1+psl->tBaseInsert + psl->qBaseInsert);
int milliBad = roundingScale(1000, badFactor, size);

if (sameString(psl->qName, "ti|18649044"))
    {
    static int maxc = 10;
    uglyf("%s: size %d, badFactor %d, milliBad %d\n", psl->qName, size, badFactor, milliBad);
    if (--maxc == 0)
	uglyAbort("All for now");
    }
#ifdef NEVER
#endif /* NEVER */

if (milliBad < 85)
     return FALSE;
return TRUE;
}
Beispiel #4
0
void aliTrack(char *bacAcc, char *wholeName, char *partsName, 
    struct memGfx *mg, int x, int y, FILE *mapFile, int trim, char *repeatMask)
/* Write out one alignment track. */
{
struct dnaSeq *whole, *partList, *part;
bits16 contig;
int maxBlockSize = 5000;
int wholeSize;
struct patSpace *ps;
DNA *wholeDna;

whole = faReadAllDna(wholeName);
if (slCount(whole) > 1)
    warn("%d sequences in %s, only using first", slCount(whole), wholeName);
wholeDna = whole->dna;
wholeSize = whole->size;
ps = makePatSpace(&whole, 1, oocFile, 5, 500);
partList = faReadAllDna(partsName);
printf("%d contigs in %s\n\n", slCount(partList), partsName);

for (part = partList, contig = 0; part != NULL; part = part->next, ++contig)
    {
    DNA *dna = part->dna;
    int dnaSize = part->size;
    int start, size;
    int subIx = 0;
    char numText[12];

    Color color = blockColors[contig%ArraySize(blockColors)];
    sprintf(numText, "%d", contig+1);
    for (start = trim; start < dnaSize-trim; start += size)
        {
        struct ffAli *left, *right;
        boolean rc;
        int score;

        size = dnaSize - start-trim;
        if (size > maxBlockSize)
            size = maxBlockSize;
        if (!fastFind(dna+start, size, ps, &left, &rc, &score) )
            {
            printf("Contig %d.%d:%d-%d of %d UNALIGNED\n",
                contig+1, subIx, start, start+size, dnaSize);
            }
        else
            {
            int x1, x2;
            int xo, w;
            double quality;
            int qStart, qSize, tStart,tSize;
            char qualityString[40];

            right = left;
            while (right->right != NULL)
                right = right->right;
            qStart = left->nStart - dna;
            qSize = right->nEnd - left->nStart;
	    if (rc)
		{
		int rcEnd = right->nEnd - (dna+start) - 1;
		qStart = reverseOffset(rcEnd, size) + start;
		}
            tStart = left->hStart - wholeDna;
            tSize = right->hEnd - left->hStart;
            quality = 100.0 * score / qSize;
            if (quality >= 25.0)
                sprintf(qualityString, "%4.1f%%", quality);
            else
                sprintf(qualityString, "<50%%");

            printf("<A HREF=\"../cgi-bin/chkGlue.exe?bacAcc=%s&contig=%d&qStart=%d&qSize=%d&tStart=%d&tSize=%d&repeatMask=%s\">",
                bacAcc, contig, qStart, qSize, tStart, tSize, repeatMask);

            printf("Contig %d.%d:%d-%d %c of %d aligned %d-%d of %d aliSize %d quality %s</A>\n",
                contig+1, subIx, qStart, qStart+qSize, 
                (rc ? '-' : '+'), dnaSize, 
                tStart, tStart + tSize,
                wholeSize,
                qSize, qualityString);
            x1 = roundingScale(trackWidth, left->hStart - wholeDna, wholeSize);
            x2 = roundingScale(trackWidth, right->hEnd - wholeDna, wholeSize);
            xo = x1+x;
            w = x2-x1;
            mapWriteBox(mapFile, mtBlock, xo, y, w, trackHeight,
                bacAcc, contig, qStart, qSize, tStart, tSize);
            mgDrawBox(mg, xo, y, w, trackHeight, color);
            mgTextCentered(mg, xo, y, w, trackHeight, MG_WHITE, font, numText);
            ffFreeAli(&left);
            }
        ++subIx;
        }
    }
freePatSpace(&ps);
freeAllSeq(&whole);
freeAllSeq(&partList);
}
Beispiel #5
0
static void savePslx(char *chromName, int chromSize, int chromOffset,
                     struct ffAli *ali, struct dnaSeq *tSeq, struct dnaSeq *qSeq,
                     boolean isRc, enum ffStringency stringency, int minMatch, FILE *f,
                     struct hash *t3Hash, boolean reportTargetStrand, boolean targetIsRc,
                     struct hash *maskHash, int minIdentity,
                     boolean qIsProt, boolean tIsProt, boolean saveSeq)
/* Analyse one alignment and if it looks good enough write it out to file in
 * psl format (or pslX format - if saveSeq is TRUE).  */
{
    /* This function was stolen from psLayout and slightly extensively to cope
     * with protein as well as DNA aligments. */
    struct ffAli *ff, *nextFf;
    struct ffAli *right = ffRightmost(ali);
    DNA *needle = qSeq->dna;
    DNA *hay = tSeq->dna;
    int nStart = ali->nStart - needle;
    int nEnd = right->nEnd - needle;
    int hStart, hEnd;
    int nInsertBaseCount = 0;
    int nInsertCount = 0;
    int hInsertBaseCount = 0;
    int hInsertCount = 0;
    int matchCount = 0;
    int mismatchCount = 0;
    int repMatch = 0;
    int countNs = 0;
    DNA *np, *hp, n, h;
    int blockSize;
    int i;
    struct trans3 *t3List = NULL;
    Bits *maskBits = NULL;

    if (maskHash != NULL)
        maskBits = hashMustFindVal(maskHash, tSeq->name);
    if (t3Hash != NULL)
        t3List = hashMustFindVal(t3Hash, tSeq->name);
    hStart = trans3GenoPos(ali->hStart, tSeq, t3List, FALSE) + chromOffset;
    hEnd = trans3GenoPos(right->hEnd, tSeq, t3List, TRUE) + chromOffset;

    /* Count up matches, mismatches, inserts, etc. */
    for (ff = ali; ff != NULL; ff = nextFf)
    {
        nextFf = ff->right;
        blockSize = ff->nEnd - ff->nStart;
        np = ff->nStart;
        hp = ff->hStart;
        for (i=0; i<blockSize; ++i)
        {
            n = np[i];
            h = hp[i];
            if (n == 'n' || h == 'n')
                ++countNs;
            else
            {
                if (n == h)
                {
                    if (maskBits != NULL)
                    {
                        int seqOff = hp + i - hay;
                        if (bitReadOne(maskBits, seqOff))
                            ++repMatch;
                        else
                            ++matchCount;
                    }
                    else
                        ++matchCount;
                }
                else
                    ++mismatchCount;
            }
        }
        if (nextFf != NULL)
        {
            int nhStart = trans3GenoPos(nextFf->hStart, tSeq, t3List, FALSE) + chromOffset;
            int ohEnd = trans3GenoPos(ff->hEnd, tSeq, t3List, TRUE) + chromOffset;
            int hGap = nhStart - ohEnd;
            int nGap = nextFf->nStart - ff->nEnd;

            if (nGap != 0)
            {
                ++nInsertCount;
                nInsertBaseCount += nGap;
            }
            if (hGap != 0)
            {
                ++hInsertCount;
                hInsertBaseCount += hGap;
            }
        }
    }


    /* See if it looks good enough to output, and output. */
    /* if (score >= minMatch) Moved to higher level */
    {
        int gaps = nInsertCount + (stringency == ffCdna ? 0: hInsertCount);
        int id = roundingScale(1000, matchCount + repMatch - 2*gaps, matchCount + repMatch + mismatchCount);
        if (id >= minIdentity)
        {
            if (isRc)
            {
                int temp;
                int oSize = qSeq->size;
                temp = nStart;
                nStart = oSize - nEnd;
                nEnd = oSize - temp;
            }
            if (targetIsRc)
            {
                int temp;
                temp = hStart;
                hStart = chromSize - hEnd;
                hEnd = chromSize - temp;
            }
            fprintf(f, "%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%c",
                    matchCount, mismatchCount, repMatch, countNs, nInsertCount, nInsertBaseCount, hInsertCount, hInsertBaseCount,
                    (isRc ? '-' : '+'));
            if (reportTargetStrand)
                fprintf(f, "%c", (targetIsRc ? '-' : '+') );
            fprintf(f, "\t%s\t%d\t%d\t%d\t"
                    "%s\t%d\t%d\t%d\t%d\t",
                    qSeq->name, qSeq->size, nStart, nEnd,
                    chromName, chromSize, hStart, hEnd,
                    ffAliCount(ali));
            for (ff = ali; ff != NULL; ff = ff->right)
                fprintf(f, "%ld,", (long)(ff->nEnd - ff->nStart));
            fprintf(f, "\t");
            for (ff = ali; ff != NULL; ff = ff->right)
                fprintf(f, "%ld,", (long)(ff->nStart - needle));
            fprintf(f, "\t");
            for (ff = ali; ff != NULL; ff = ff->right)
                fprintf(f, "%d,", trans3GenoPos(ff->hStart, tSeq, t3List, FALSE) + chromOffset);
            if (saveSeq)
            {
                fputc('\t', f);
                for (ff = ali; ff != NULL; ff = ff->right)
                {
                    mustWrite(f, ff->nStart, ff->nEnd - ff->nStart);
                    fputc(',', f);
                }
                fputc('\t', f);
                for (ff = ali; ff != NULL; ff = ff->right)
                {
                    mustWrite(f, ff->hStart, ff->hEnd - ff->hStart);
                    fputc(',', f);
                }
            }
            fprintf(f, "\n");
            if (ferror(f))
            {
                perror("");
                errAbort("Write error to .psl");
            }
        }
    }
}