Пример #1
0
DNA *wormGetNamelessClusterDna(char *name)
/* Get DNA associated with nameless cluster */
{
char *chrom;
int start, end;
char strand;
if (!wormGeneRange(name, &chrom, &strand, &start, &end))
    errAbort("Can't find %s in database", name);
return wormChromPart(chrom, start, end-start);
}
Пример #2
0
boolean getWormGeneExonDna(char *name, DNA **retDna)
/* Get the DNA associated with a gene, without introns.  */
{
struct gdfGene *g;
struct slName *syn = NULL;
long lstart, lend;
int start, end;
int dnaSize;
DNA *dna;
int i;
struct gdfDataPoint *pt = NULL;
struct wormGdfCache *gdfCache;
struct dyString *dy = newDyString(1000);
/* Translate biologist type name to cosmid.N name */
if (wormIsGeneName(name))
    {
    syn = wormGeneToOrfNames(name);
    if (syn != NULL)
        name = syn->name;
    }
if (strncmp(name, "g-", 2) == 0)
    gdfCache = &wormGenieGdfCache;
else
    gdfCache = &wormSangerGdfCache;
if ((g = wormGetSomeGdfGene(name, gdfCache)) == NULL)
    return FALSE;
gdfGeneExtents(g, &lstart, &lend);
start = lstart;
end = lend;
/*wormClipRangeToChrom(chromIds[g->chromIx], &start, &end);*/
dnaSize = end-start;
dna = wormChromPart(chromIds[g->chromIx], start, dnaSize);

gdfOffsetGene(g, -start);
if (g->strand == '-')
    {
    reverseComplement(dna, dnaSize);
    gdfRcGene(g, dnaSize);
    }
pt = g->dataPoints;
for (i=0; i<g->dataCount; i += 2)
    {
    dyStringAppendN(dy, (dna+pt[i].start), (pt[i+1].start - pt[i].start));
    }
*retDna = cloneString(dy->string);
dyStringFree(&dy);
gdfFreeGene(g);
return TRUE;
}
Пример #3
0
boolean getWormGeneDna(char *name, DNA **retDna, boolean upcExons)
/* Get the DNA associated with a gene.  Optionally upper case exons. */
{
struct gdfGene *g;
struct slName *syn = NULL;
long lstart, lend;
int start, end;
int dnaSize;
DNA *dna;
struct wormGdfCache *gdfCache;

/* Translate biologist type name to cosmid.N name */
if (wormIsGeneName(name))
    {
    syn = wormGeneToOrfNames(name);
    if (syn != NULL)
        name = syn->name;
    }
if (strncmp(name, "g-", 2) == 0)
    gdfCache = &wormGenieGdfCache;
else
    gdfCache = &wormSangerGdfCache;
if ((g = wormGetSomeGdfGene(name, gdfCache)) == NULL)
    return FALSE;
gdfGeneExtents(g, &lstart, &lend);
start = lstart;
end = lend;
/* wormClipRangeToChrom(chromIds[g->chromIx], &start, &end); */
dnaSize = end-start;
*retDna = dna = wormChromPart(chromIds[g->chromIx], start, dnaSize);

gdfOffsetGene(g, -start);
if (g->strand == '-')
    {
    reverseComplement(dna, dnaSize);
    gdfRcGene(g, dnaSize);
    }
if (upcExons)
    {
    int i;
    struct gdfDataPoint *pt = g->dataPoints;
    for (i=0; i<g->dataCount; i += 2)
        {
        toUpperN(dna + pt[i].start, pt[i+1].start - pt[i].start);
        }
    }
gdfFreeGene(g);
return TRUE;
}
Пример #4
0
DNA *wormChromPartExonsUpper(char *chromId, int start, int size)
/* Return part of a worm chromosome with exons in upper case. */
{
DNA *dna = wormChromPart(chromId, start, size);
struct wormFeature *geneFeat = wormGenesInRange(chromId, start, start+size);
struct wormFeature *feat;

for (feat = geneFeat; feat != NULL; feat = feat->next)
    {
    char *name = feat->name;
    if (!wormIsNamelessCluster(name))
        {
        struct gdfGene *gene = wormGetGdfGene(name);
        gdfUpcExons(gene, feat->start, dna, size, start);
        gdfFreeGene(gene);
        }
    }
slFreeList(&geneFeat);
return dna;
}
Пример #5
0
void doMiddle()
{
char *seqName;
boolean intronsLowerCase = TRUE;
boolean intronsParenthesized = FALSE;
boolean hiliteNear = FALSE;
int startRange = 0;
int endRange = 0;
boolean gotRange = FALSE;
struct dnaSeq *cdnaSeq;
boolean isChromRange = FALSE;
DNA *dna;
char *translation = NULL;

seqName = cgiString("geneName");
seqName = trimSpaces(seqName);
if (cgiVarExists("intronsLowerCase"))
    intronsLowerCase = cgiBoolean("intronsLowerCase");
if (cgiVarExists("intronsParenthesized"))
    intronsParenthesized = cgiBoolean("intronsParenthesized");
if (cgiVarExists("startRange") && cgiVarExists("endRange" ))
    {
    startRange = cgiInt("startRange");
    endRange = cgiInt("endRange");
    gotRange = TRUE;
    }
if (cgiVarExists("hiliteNear"))
    {
    hiliteNear = TRUE;
    }
fprintf(stdout, "<P><TT>\n");

/* The logic here is a little complex to optimize speed.
 * If we can decide what type of thing the name refers to by
 * simply looking at the name we do.  Otherwise we have to
 * search the database in various ways until we get a hit. */
if (wormIsNamelessCluster(seqName))
    {
    isChromRange = TRUE;
    }
else if (wormIsChromRange(seqName))
    {
    isChromRange = TRUE;
    }
else if (getWormGeneDna(seqName, &dna, TRUE))
    {
    if (cgiBoolean("litLink"))
        {
        char nameBuf[64];
        char *geneName = NULL;
        char *productName = NULL;
        char *coding;
        int transSize;
        struct wormCdnaInfo info;

        printf("<H3>Information and Links for %s</H3>\n", seqName);
        if (wormInfoForGene(seqName, &info))
            {
            if (info.description)
                printf("<P>%s</P>\n", info.description);
            geneName = info.gene;
            productName = info.product;
            }
        else
            {
            if (wormIsGeneName(seqName))
                geneName = seqName;
            else if (wormGeneForOrf(seqName, nameBuf, sizeof(nameBuf)))
                geneName = nameBuf;
            }
        coding = cloneUpperOnly(dna);
        transSize = 1 + (strlen(coding)+2)/3;
        translation = needMem(1+strlen(coding)/3);
        dnaTranslateSome(coding, translation, transSize);
        freez(&coding);

        if (geneName)
            {
            printf("<A HREF=\"http://www.ncbi.nlm.nih.gov/htbin-post/Entrez/query?form=4&db=m"
                    "&term=C+elegans+%s&dispmax=50&relentrezdate=No+Limit\">", geneName);
            printf("PubMed search on gene: </A>%s<BR>\n", geneName);
            }
        if (productName)
            {
            char *encoded = cgiEncode(productName);
            printf("<A HREF=\"http://www.ncbi.nlm.nih.gov/htbin-post/Entrez/query?form=4&db=m"
                    "&term=%s&dispmax=50&relentrezdate=No+Limit\">", encoded);
            printf("PubMed search on product:</A> %s<BR>\n", productName);
            freeMem(encoded);
            }
        /* Process name to get rid of isoform letter for Proteome. */
        if (geneName)
            strcpy(nameBuf, geneName);
        else
            {
            strcpy(nameBuf, seqName);
#ifdef NEVER
            /* Sometimes Proteome requires the letter after the orf name
             * in alt-spliced cases, sometimes it can't handle it.... */
            nameLen = strlen(nameBuf);
            if (wormIsOrfName(nameBuf) && isalpha(nameBuf[nameLen-1]))
                {
                char *dotPos = strrchr(nameBuf, '.');
                if (dotPos != NULL && isdigit(dotPos[1]))
                    nameBuf[nameLen-1] = 0;
                }
#endif /* NEVER */
            }
	printf("<A HREF=\"http://www.wormbase.org/db/seq/sequence?name=%s;class=Sequence\">", seqName);
	printf("WormBase link on:</A> %s<BR>\n", seqName);
        printf("<A HREF=\"http://www.proteome.com/databases/WormPD/reports/%s.html\">", nameBuf);
        printf("Proteome link on:</A> %s<BR>\n<BR>\n", nameBuf);


        printf("<A HREF=#DNA>Genomic DNA Sequence</A><BR>\n");
        if (hiliteNear)
            printf("<A HREF=\"#CLICKED\">Shortcut to where you clicked in gene</A><BR>");
        printf("<A HREF=#protein>Translated Protein Sequence</A><BR>\n");
        htmlHorizontalLine();
	printf("<A NAME=DNA></A>");
        printf("<H3>%s Genomic DNA sequence</H3>", seqName);
        }
    if (!intronsLowerCase)
        tolowers(dna);
    if (hiliteNear)
	{
	if (!gotRange)
	    {
	    double nearPos = cgiDouble("hiliteNear");
	    int rad = 5;
	    int dnaSize = strlen(dna);
	    long mid = (int)(dnaSize * nearPos);
	    startRange = mid - rad;
	    if (startRange < 0) startRange = 0;
	    endRange = mid + rad;
	    if (endRange >= dnaSize) endRange = dnaSize - 1;
	    }
	}
    outputSeq(dna, strlen(dna), hiliteNear, startRange, endRange, stdout);
    freez(&dna);
    }
else if (wormCdnaSeq(seqName, &cdnaSeq, NULL))
    {
    outputSeq(cdnaSeq->dna, cdnaSeq->size, FALSE, 0, 0, stdout);
    }
else
    {
    isChromRange = TRUE;
    }
if (isChromRange)
    {
    char *chromId;
    int start, end;
    char strand = '+';
    int size;

    if (!wormGeneRange(seqName, &chromId, &strand, &start, &end))
        errAbort("Can't find %s",seqName);
    size = end - start;
    if (intronsLowerCase)
        dna = wormChromPartExonsUpper(chromId, start, size);
    else
        {
        dna = wormChromPart(chromId, start, size);
        touppers(dna);
        }
    if (cgiVarExists("strand"))
        strand = cgiString("strand")[0];
    if (strand == '-')
        reverseComplement(dna, size);
    outputSeq(dna, size, FALSE, 0, 0, stdout);
    }
if (translation != NULL)
    {
    htmlHorizontalLine();
    printf("<A NAME=protein></A>");
    printf("<H3>Translated Protein of %s</H3>\n", seqName);
    outputSeq(translation, strlen(translation), FALSE, 0, 0, stdout);
    freez(&translation);
    }
fprintf(stdout, "</TT></P>\n");

}
Пример #6
0
void showClump(struct ernaClump *clump, FILE *f)
/* Show detailed alignment for one clump. */
{
int chromStart = clump->start - 1000;
int chromEnd = clump->end + 1000;
int chromSize;
DNA *chromDna;
struct wormFeature *cdnaNameList, *cdnaName;
struct lineAli *laList = NULL, *la;
struct ffAli *ali;
struct dnaSeq *cdna;
boolean rcCdna;
int clumpSize = clump->end - clump->start + 1;
int displaySize = lineSize;
int displayStart = (clump->start+clump->end)/2 - displaySize/2;
int displayEnd = displayStart + displaySize;
int displayDnaOffset;
DNA *displayDna;
struct ernaHit *hit;

/* Get genomic dna and list of all cDNAs in area around clump. */
wormClipRangeToChrom(clump->chrom, &chromStart, &chromEnd);
chromSize = chromEnd - chromStart;
chromDna = wormChromPart(clump->chrom, chromStart, chromSize);
cdnaNameList = wormCdnasInRange(clump->chrom, chromStart, chromEnd);

/* Figure out 60 bases to display alignment around clump. */
wormClipRangeToChrom(clump->chrom, &displayStart, &displayEnd);
displaySize = displayEnd - displayStart;
displayDnaOffset = displayStart - chromStart;
displayDna = chromDna + displayDnaOffset;

/* Make up detailed alignment on each cDNA */
for (cdnaName = cdnaNameList; cdnaName != NULL; cdnaName = cdnaName->next)
    {
    struct wormCdnaInfo info;
    if (!wormCdnaSeq(cdnaName->name, &cdna, &info))
        {
        warn("Couldn't find %s", cdnaName->name);
        continue;
        }
    if (!ffFindEitherStrandN(cdna->dna, cdna->size, chromDna, chromSize, ffCdna, &ali, &rcCdna))
        {
        warn("Couldn't align %s", cdnaName->name);
        continue;
        }
    if (rcCdna)
        reverseComplement(cdna->dna, cdna->size);
    la = makeLineAli(cdnaName->name, ali, chromDna, cdna->dna, displayDnaOffset);
    la->isEmbryo = info.isEmbryonic;
    slAddHead(&laList, la);    
    freeDnaSeq(&cdna);
    ffFreeAli(&ali);
    }

/* Display genomic with upper case at hot spots*/
displayDna[displaySize] = 0;
for (hit = clump->hits; hit != NULL; hit = hit->next)
    {
    int doff = hit->pos - chromStart;
    chromDna[doff] = toupper(chromDna[doff]);
    }
fprintf(f, "%s Genomic\n", displayDna);

/* Display aligned list by sorted score. */
slSort(&laList, cmpLaScore);
for (la = laList; la != NULL; la = la->next)
    {
    if (spaceCount(la->line) != lineSize)
        fprintf(f, "%s %s %s\n", la->line, la->name, (la->isEmbryo ? "emb" : "   "));
    }
/* Clean up. */
slFreeList(&cdnaNameList);
slFreeList(&laList);
freeMem(chromDna);
}
Пример #7
0
int main(int argc, char *argv[])
{
#define stepSize 10000
#define extraBases 1000
static struct noiseTrack noiseTrack[stepSize];
int chromIx;
int chromSize;
int baseOff;
char *chromName;
int dnaStart, dnaEnd;
char *outName;
FILE *out;
struct hash *dupeHash;

if (argc != 2)
    {
    errAbort("editbase - lists bases for which there is evidence of RNA editing\n"
             "usage:\n"
             "      editbase outfile.txt");
    }
dnaUtilOpen();
initVlookup();
outName = argv[1];
out = mustOpen(outName, "w");
printf("Scanning for cDNAs that align more than once.\n");
dupeHash = buildMultiAlignHash();
printf("Loading worm genome\n");
wormLoadNt4Genome(&chrom, &chromCount);
wormChromNames(&chromNames, &chromCount);
for (chromIx = 0; chromIx < chromCount; ++chromIx)
    {
    chromName = chromNames[chromIx];
    printf("Processing chromosome %s\n", chromName);
    chromSize = wormChromSize(chromName);
    for (baseOff = 0; baseOff < chromSize; baseOff += stepSize)
        {
        struct wormFeature *cdnaNamesList, *name;
        struct cdnaAli *caList = NULL, *ca;
        int dnaSize;
        DNA *dna;
        int chunkSize;
        DNA *chunk;
        int i;
        


       /* Figure out how much DNA to get and get it.  Include some
         * extra around chunk so can align better. */
        chunkSize = chromSize - baseOff;
        if (chunkSize > stepSize) chunkSize = stepSize;
        dnaStart = baseOff - extraBases;
        dnaEnd = baseOff + stepSize + extraBases;
        wormClipRangeToChrom(chromName, &dnaStart, &dnaEnd);
        dnaSize = dnaEnd - dnaStart;
        dna = wormChromPart(chromName, dnaStart, dnaSize);

        /* Get the cDNAs */
        cdnaNamesList = wormCdnasInRange(chromName, baseOff, baseOff + chunkSize);
        for (name = cdnaNamesList; name != NULL; name = name->next)
            {
            if (!hashLookup(dupeHash, name->name) )
                {
                ca = makeCdnaAli(name->name, dna, dnaSize);
                slAddHead(&caList, ca);
                }
            }
        slReverse(&caList); 
        
        /* Add cdnas to noise track. */
        chunk = dna + baseOff - dnaStart;
        for (ca = caList; ca != NULL; ca = ca->next)
            {
            addNoiseTrack(noiseTrack, chunk, chunkSize, ca);
            }

        /* Step through base by base evaluating noise and reporting it if
         * it's interesting. */
        for (i=0; i<chunkSize; ++i)
            {
            struct noiseTrack *nt = &noiseTrack[i];
            struct noise *noise = nt->noise;
            int noiseCount = slCount(noise);
            if (noiseCount > 1)
                {
                char commonVal;
                int commonCount;
                findCommon(noise, &commonVal, &commonCount);
                if (commonCount*2 > noiseCount && commonVal != 'n')
                    {
                    double ratio = (double)commonCount/noiseCount;
                    double score;
                    ratio = ratio * ratio * ratio;
                    score = ratio * commonCount;
                    if (score >= 4.0)
                        {
                        fprintf(stdout, "%f %s:%d %c->%c in %d out of %d out of %d %s\n",
                            ratio*commonCount, chromName, i+baseOff+1,
                            chunk[i], commonVal, 
                            commonCount, noiseCount, nt->cdnaCount, nt->noise->ca->cdna->srn->name);
                        fprintf(out, "%f %s:%d %c->%c in %d out of %d out of %d %s\n",
                            ratio*ratio*commonCount, chromName, i+baseOff+1,
                            chunk[i], commonVal, 
                            commonCount, noiseCount, nt->cdnaCount, nt->noise->ca->cdna->srn->name);
                        }
                    }
                }
            }
        freeCdnaAliList(&caList);
        slFreeList(&cdnaNamesList);     
        freez(&dna);
        recycleNoiseTrack(noiseTrack, chunkSize);
        printf("%s %d maxNoise %d\n", chromName, baseOff, slCount(freeNoiseList));
       }
    }
return 0;
}