Esempio n. 1
0
void wormClipRangeToChrom(char *chrom, int *pStart, int *pEnd)
/* Make sure that we stay inside chromosome. */
{
int chromEnd = wormChromSize(chrom);
int temp;

/* Swap ends if reversed. */
if (*pStart > *pEnd)
    {
    temp = *pEnd;
    *pEnd = *pStart;
    *pStart = temp;
    }
/* Generally speaking try to slide the range covered by
 * start-end inside the chromosome rather than just
 * truncating an end. */
if (*pStart < 0)
    {
    *pEnd -= *pStart;
    *pStart = 0;
    }
if (*pEnd > chromEnd)
    {
    *pStart -= *pEnd - chromEnd;
    *pEnd = chromEnd;
    }
/* This handles case where the range is larger than the chromosome. */
if (*pStart < 0)
    *pStart = 0;
}
Esempio n. 2
0
int main(int argc, char *argv[])
{
#define stepSize 10000
#define extraBases 1000
static struct noiseTrack noiseTrack[stepSize];
int chromIx;
int chromSize;
int baseOff;
char *chromName;
int dnaStart, dnaEnd;
char *outName;
FILE *out;
struct hash *dupeHash;

if (argc != 2)
    {
    errAbort("editbase - lists bases for which there is evidence of RNA editing\n"
             "usage:\n"
             "      editbase outfile.txt");
    }
dnaUtilOpen();
initVlookup();
outName = argv[1];
out = mustOpen(outName, "w");
printf("Scanning for cDNAs that align more than once.\n");
dupeHash = buildMultiAlignHash();
printf("Loading worm genome\n");
wormLoadNt4Genome(&chrom, &chromCount);
wormChromNames(&chromNames, &chromCount);
for (chromIx = 0; chromIx < chromCount; ++chromIx)
    {
    chromName = chromNames[chromIx];
    printf("Processing chromosome %s\n", chromName);
    chromSize = wormChromSize(chromName);
    for (baseOff = 0; baseOff < chromSize; baseOff += stepSize)
        {
        struct wormFeature *cdnaNamesList, *name;
        struct cdnaAli *caList = NULL, *ca;
        int dnaSize;
        DNA *dna;
        int chunkSize;
        DNA *chunk;
        int i;
        


       /* Figure out how much DNA to get and get it.  Include some
         * extra around chunk so can align better. */
        chunkSize = chromSize - baseOff;
        if (chunkSize > stepSize) chunkSize = stepSize;
        dnaStart = baseOff - extraBases;
        dnaEnd = baseOff + stepSize + extraBases;
        wormClipRangeToChrom(chromName, &dnaStart, &dnaEnd);
        dnaSize = dnaEnd - dnaStart;
        dna = wormChromPart(chromName, dnaStart, dnaSize);

        /* Get the cDNAs */
        cdnaNamesList = wormCdnasInRange(chromName, baseOff, baseOff + chunkSize);
        for (name = cdnaNamesList; name != NULL; name = name->next)
            {
            if (!hashLookup(dupeHash, name->name) )
                {
                ca = makeCdnaAli(name->name, dna, dnaSize);
                slAddHead(&caList, ca);
                }
            }
        slReverse(&caList); 
        
        /* Add cdnas to noise track. */
        chunk = dna + baseOff - dnaStart;
        for (ca = caList; ca != NULL; ca = ca->next)
            {
            addNoiseTrack(noiseTrack, chunk, chunkSize, ca);
            }

        /* Step through base by base evaluating noise and reporting it if
         * it's interesting. */
        for (i=0; i<chunkSize; ++i)
            {
            struct noiseTrack *nt = &noiseTrack[i];
            struct noise *noise = nt->noise;
            int noiseCount = slCount(noise);
            if (noiseCount > 1)
                {
                char commonVal;
                int commonCount;
                findCommon(noise, &commonVal, &commonCount);
                if (commonCount*2 > noiseCount && commonVal != 'n')
                    {
                    double ratio = (double)commonCount/noiseCount;
                    double score;
                    ratio = ratio * ratio * ratio;
                    score = ratio * commonCount;
                    if (score >= 4.0)
                        {
                        fprintf(stdout, "%f %s:%d %c->%c in %d out of %d out of %d %s\n",
                            ratio*commonCount, chromName, i+baseOff+1,
                            chunk[i], commonVal, 
                            commonCount, noiseCount, nt->cdnaCount, nt->noise->ca->cdna->srn->name);
                        fprintf(out, "%f %s:%d %c->%c in %d out of %d out of %d %s\n",
                            ratio*ratio*commonCount, chromName, i+baseOff+1,
                            chunk[i], commonVal, 
                            commonCount, noiseCount, nt->cdnaCount, nt->noise->ca->cdna->srn->name);
                        }
                    }
                }
            }
        freeCdnaAliList(&caList);
        slFreeList(&cdnaNamesList);     
        freez(&dna);
        recycleNoiseTrack(noiseTrack, chunkSize);
        printf("%s %d maxNoise %d\n", chromName, baseOff, slCount(freeNoiseList));
       }
    }
return 0;
}