Ejemplo n.º 1
0
boolean wormParseChromRange(char *in, char **retChromId, int *retStart, int *retEnd)
/* Chop up a string representation of a range within a chromosome and put the
 * pieces into the return variables. Return FALSE if it isn't formatted right. */
{
char *words[5];
int wordCount;
char *chromId;
char buf[128];

strncpy(buf, in, sizeof(buf));
wordCount = chopString(buf, "- \t\r\n:", words, ArraySize(words));
if (wordCount != 3)
    return FALSE;
chromId = wormOfficialChromName(words[0]);
if (chromId == NULL)
    return FALSE;
if (!isdigit(words[1][0]) || !isdigit(words[2][0]))
    return FALSE;
*retChromId = chromId;
*retStart = atoi(words[1]);
*retEnd = atoi(words[2]);
wormClipRangeToChrom(chromId, retStart, retEnd);
return TRUE;
}
Ejemplo n.º 2
0
void showClump(struct ernaClump *clump, FILE *f)
/* Show detailed alignment for one clump. */
{
int chromStart = clump->start - 1000;
int chromEnd = clump->end + 1000;
int chromSize;
DNA *chromDna;
struct wormFeature *cdnaNameList, *cdnaName;
struct lineAli *laList = NULL, *la;
struct ffAli *ali;
struct dnaSeq *cdna;
boolean rcCdna;
int clumpSize = clump->end - clump->start + 1;
int displaySize = lineSize;
int displayStart = (clump->start+clump->end)/2 - displaySize/2;
int displayEnd = displayStart + displaySize;
int displayDnaOffset;
DNA *displayDna;
struct ernaHit *hit;

/* Get genomic dna and list of all cDNAs in area around clump. */
wormClipRangeToChrom(clump->chrom, &chromStart, &chromEnd);
chromSize = chromEnd - chromStart;
chromDna = wormChromPart(clump->chrom, chromStart, chromSize);
cdnaNameList = wormCdnasInRange(clump->chrom, chromStart, chromEnd);

/* Figure out 60 bases to display alignment around clump. */
wormClipRangeToChrom(clump->chrom, &displayStart, &displayEnd);
displaySize = displayEnd - displayStart;
displayDnaOffset = displayStart - chromStart;
displayDna = chromDna + displayDnaOffset;

/* Make up detailed alignment on each cDNA */
for (cdnaName = cdnaNameList; cdnaName != NULL; cdnaName = cdnaName->next)
    {
    struct wormCdnaInfo info;
    if (!wormCdnaSeq(cdnaName->name, &cdna, &info))
        {
        warn("Couldn't find %s", cdnaName->name);
        continue;
        }
    if (!ffFindEitherStrandN(cdna->dna, cdna->size, chromDna, chromSize, ffCdna, &ali, &rcCdna))
        {
        warn("Couldn't align %s", cdnaName->name);
        continue;
        }
    if (rcCdna)
        reverseComplement(cdna->dna, cdna->size);
    la = makeLineAli(cdnaName->name, ali, chromDna, cdna->dna, displayDnaOffset);
    la->isEmbryo = info.isEmbryonic;
    slAddHead(&laList, la);    
    freeDnaSeq(&cdna);
    ffFreeAli(&ali);
    }

/* Display genomic with upper case at hot spots*/
displayDna[displaySize] = 0;
for (hit = clump->hits; hit != NULL; hit = hit->next)
    {
    int doff = hit->pos - chromStart;
    chromDna[doff] = toupper(chromDna[doff]);
    }
fprintf(f, "%s Genomic\n", displayDna);

/* Display aligned list by sorted score. */
slSort(&laList, cmpLaScore);
for (la = laList; la != NULL; la = la->next)
    {
    if (spaceCount(la->line) != lineSize)
        fprintf(f, "%s %s %s\n", la->line, la->name, (la->isEmbryo ? "emb" : "   "));
    }
/* Clean up. */
slFreeList(&cdnaNameList);
slFreeList(&laList);
freeMem(chromDna);
}
Ejemplo n.º 3
0
int main(int argc, char *argv[])
{
#define stepSize 10000
#define extraBases 1000
static struct noiseTrack noiseTrack[stepSize];
int chromIx;
int chromSize;
int baseOff;
char *chromName;
int dnaStart, dnaEnd;
char *outName;
FILE *out;
struct hash *dupeHash;

if (argc != 2)
    {
    errAbort("editbase - lists bases for which there is evidence of RNA editing\n"
             "usage:\n"
             "      editbase outfile.txt");
    }
dnaUtilOpen();
initVlookup();
outName = argv[1];
out = mustOpen(outName, "w");
printf("Scanning for cDNAs that align more than once.\n");
dupeHash = buildMultiAlignHash();
printf("Loading worm genome\n");
wormLoadNt4Genome(&chrom, &chromCount);
wormChromNames(&chromNames, &chromCount);
for (chromIx = 0; chromIx < chromCount; ++chromIx)
    {
    chromName = chromNames[chromIx];
    printf("Processing chromosome %s\n", chromName);
    chromSize = wormChromSize(chromName);
    for (baseOff = 0; baseOff < chromSize; baseOff += stepSize)
        {
        struct wormFeature *cdnaNamesList, *name;
        struct cdnaAli *caList = NULL, *ca;
        int dnaSize;
        DNA *dna;
        int chunkSize;
        DNA *chunk;
        int i;
        


       /* Figure out how much DNA to get and get it.  Include some
         * extra around chunk so can align better. */
        chunkSize = chromSize - baseOff;
        if (chunkSize > stepSize) chunkSize = stepSize;
        dnaStart = baseOff - extraBases;
        dnaEnd = baseOff + stepSize + extraBases;
        wormClipRangeToChrom(chromName, &dnaStart, &dnaEnd);
        dnaSize = dnaEnd - dnaStart;
        dna = wormChromPart(chromName, dnaStart, dnaSize);

        /* Get the cDNAs */
        cdnaNamesList = wormCdnasInRange(chromName, baseOff, baseOff + chunkSize);
        for (name = cdnaNamesList; name != NULL; name = name->next)
            {
            if (!hashLookup(dupeHash, name->name) )
                {
                ca = makeCdnaAli(name->name, dna, dnaSize);
                slAddHead(&caList, ca);
                }
            }
        slReverse(&caList); 
        
        /* Add cdnas to noise track. */
        chunk = dna + baseOff - dnaStart;
        for (ca = caList; ca != NULL; ca = ca->next)
            {
            addNoiseTrack(noiseTrack, chunk, chunkSize, ca);
            }

        /* Step through base by base evaluating noise and reporting it if
         * it's interesting. */
        for (i=0; i<chunkSize; ++i)
            {
            struct noiseTrack *nt = &noiseTrack[i];
            struct noise *noise = nt->noise;
            int noiseCount = slCount(noise);
            if (noiseCount > 1)
                {
                char commonVal;
                int commonCount;
                findCommon(noise, &commonVal, &commonCount);
                if (commonCount*2 > noiseCount && commonVal != 'n')
                    {
                    double ratio = (double)commonCount/noiseCount;
                    double score;
                    ratio = ratio * ratio * ratio;
                    score = ratio * commonCount;
                    if (score >= 4.0)
                        {
                        fprintf(stdout, "%f %s:%d %c->%c in %d out of %d out of %d %s\n",
                            ratio*commonCount, chromName, i+baseOff+1,
                            chunk[i], commonVal, 
                            commonCount, noiseCount, nt->cdnaCount, nt->noise->ca->cdna->srn->name);
                        fprintf(out, "%f %s:%d %c->%c in %d out of %d out of %d %s\n",
                            ratio*ratio*commonCount, chromName, i+baseOff+1,
                            chunk[i], commonVal, 
                            commonCount, noiseCount, nt->cdnaCount, nt->noise->ca->cdna->srn->name);
                        }
                    }
                }
            }
        freeCdnaAliList(&caList);
        slFreeList(&cdnaNamesList);     
        freez(&dna);
        recycleNoiseTrack(noiseTrack, chunkSize);
        printf("%s %d maxNoise %d\n", chromName, baseOff, slCount(freeNoiseList));
       }
    }
return 0;
}