void outputOneRa(struct dnaSeq *seq, int start, int end, FILE *f)
/* Output one Ra record to file. */
{
    fprintf(f, "orfName %s_%d_%d\n", seq->name, start, end);
    fprintf(f, "txName %s\n", seq->name);
    fprintf(f, "txSize %d\n", seq->size);
    fprintf(f, "cdsStart %d\n", start);
    fprintf(f, "cdsEnd %d\n", end);
    fprintf(f, "cdsSize %d\n", end-start);
    fprintf(f, "gotStart %d\n", startsWith("atg", seq->dna+start));
    fprintf(f, "gotEnd %d\n", isStopCodon(seq->dna+end-3));
    boolean gotKozak1 = FALSE;
    if (start >= 3)
    {
        char c = seq->dna[start-3];
        gotKozak1 = (c == 'a' || c == 'g');
    }
    fprintf(f, "gotKozak1 %d\n", gotKozak1);
    boolean gotKozak2 = FALSE;
    if (start+3 < seq->size)
        gotKozak2 = (seq->dna[start+3] == 'g');
    fprintf(f, "gotKozak2 %d\n", gotKozak2);
    fprintf(f, "gotKozak %d\n", gotKozak1 + gotKozak2);

    /* Count up upstream ATG and Kozak */
    struct rbTree *upAtgRanges = rangeTreeNew(), *upKozakRanges = rangeTreeNew();
    int upAtg = 0, upKozak = 0;
    int i;
    for (i=0; i<start; ++i)
    {
        if (startsWith("atg", seq->dna + i))
        {
            int orfEnd = findOrfEnd(seq, i);
            if (orfEnd < start)
                rangeTreeAdd(upAtgRanges, i, orfEnd);
            ++upAtg;
            if (isKozak(seq->dna, seq->size, i))
            {
                ++upKozak;
                if (orfEnd < start)
                    rangeTreeAdd(upKozakRanges, i, orfEnd);
            }
        }
    }
    fprintf(f, "upstreamAtgCount %d\n", upAtg);
    fprintf(f, "upstreamKozakCount %d\n", upKozak);
    fprintf(f, "upstreamSize %d\n", rangeTreeOverlapSize(upAtgRanges, 0, start));
    fprintf(f, "upstreamKozakSize %d\n", rangeTreeOverlapSize(upKozakRanges, 0, start));
    fprintf(f, "\n");

    /* Cluen up and go home. */
    rangeTreeFree(&upAtgRanges);
    rangeTreeFree(&upKozakRanges);
}
Exemple #2
0
int orfEndInSeq(struct dnaSeq *seq, int start)
/* Figure out end of orf that starts at start */
{
return findOrfEnd(seq->dna, seq->size, start);
}
Exemple #3
0
void fillInArrayFromPair(struct lm *lm, struct mafComp *native, struct mafComp *xeno,
	struct orthoCds *array, int arraySize, int symCount)
/* Figure out the CDS in xeno for each position in native. */
{
char *nText = native->text, *xText = xeno->text;
int nSize = arraySize, xSize = symCount - countChars(xText, '-');

/* Create an array that for each point in native gives you the index of corresponding
 * point in xeno, and another array that does the opposite. */
int *nToX, *xToN;
lmAllocArray(lm, nToX, nSize+1);
lmAllocArray(lm, xToN, xSize+1);
int i;
int nIx = 0, xIx = 0;
for (i=0; i<symCount; ++i)
    {
    char n = nText[i], x = xText[i];
    if (n == '.')
       errAbort("Dot in native component %s of maf. Can't handle it.", native->src);
    nToX[nIx] = xIx;
    xToN[xIx] = nIx;
    if (n != '-')
	{
	array[nIx].base = x;
	nToX[nIx] = xIx;
	++nIx;
	}
    if (x != '-')
       ++xIx;
    }
assert(xIx == xSize);
assert(nIx == nSize);

/* Put an extra value at end of arrays to simplify logic. */
nToX[nSize] = xSize;
xToN[xSize] = nSize;

/* Create xeno sequence without the '-' chars */
char *xDna = lmCloneString(lm, xText);
tolowers(xDna);
stripChar(xDna, '-');

#ifdef DEBUG
uglyf("xToN:");
for (i=0; i<xSize; ++i) uglyf(" %d", xToN[i]);
uglyf("\n");
#endif /* DEBUG */

/* Step through this, one frame at a time, looking for best ORF */
int frame;
for (frame=0; frame<3; ++frame)
    {
    /* Calculate some things constant for this frame, and deal with
     * ORF that starts at beginning (may not have ATG) */
    int lastPos = xSize-3;
    int frameDnaSize = xSize-frame;
    int start = frame, end = findOrfEnd(xDna, frameDnaSize, frame);
    applyOrf(start, end, xDna, xToN, array, arraySize);
    for (start = end; start<=lastPos; )
        {
	// uglyf("start %d %c%c%c\n", start, xDna[start], xDna[start+1], xDna[start+2]);
	if (startsWith("atg", xDna+start))
	    {
	    end = findOrfEnd(xDna, frameDnaSize, start);
	    applyOrf(start, end, xDna, xToN, array, arraySize);
	    start = end;
	    }
	else
	    start += 3;
	}
    }

}