Esempio n. 1
0
int main(int argc, char *argv[])
/* Process command line. */
{
cgiSpoof(&argc, argv);
if (argc != 3)
    usage();
doTarget = !cgiBoolean("query");
noHead = (cgiBoolean("nohead") || cgiBoolean("noHead"));
maxPile = cgiOptionalInt("maxPile", maxPile);
minPile = cgiOptionalInt("minPile", minPile);
pslUnpile(argv[1], argv[2]);
return 0;
}
int main(int argc, char *argv[])
/* Process command line. */
{
cgiSpoof(&argc, argv);
if (argc < 2)
    usage();
recurse = cgiBoolean("r");
suffix = cgiOptionalString("suffix");
wildCard = cgiOptionalString("wild");
nonz = cgiBoolean("nonz");
catDir(argc-1, argv+1);
return 0;
}
int main(int argc, char *argv[])
/* Process command line. */
{
cgiSpoof(&argc, argv);
textField = cgiUsualString("textField", textField);
fileComment = cgiUsualString("comment", fileComment);
picky = cgiBoolean("picky");
makeMain = cgiBoolean("main");
positiveOnly = cgiBoolean("positive");
if (argc != 3)
    usage();
autoXml(argv[1], argv[2]);
return 0;
}
void checkArguments() 
/** setup our parameters depending on whether we've been called as a
    cgi script or from the command line */ 
{
hgTest = cgiBoolean("hgTest");
numTests = cgiOptionalInt("numTests",0);
origDb = cgiOptionalString("origDb");
origGenome = cgiOptionalString("origGenome");
newGenome = cgiOptionalString("newGenome");
position = cgiOptionalString("position");
chrom = cgiOptionalString("chrom");
chromStart = cgiOptionalInt("chromStart", -1);
chromEnd = cgiOptionalInt("chromEnd", -1);
calledSelf = cgiBoolean("calledSelf");

/* if we're testing we don't need to worry about UI errors */
if(hgTest)
    return;

/* parse the position string and make sure that it makes sense */
if (position != NULL && position[0] != 0)
    {
    parsePosition(cloneString(position), &chrom, &chromStart, &chromEnd);
    }
if (chromStart > chromEnd)
    { 
    webAbort("Error:", "Start of range is greater than end. %d > %d", chromStart, chromEnd);
    }

/* convert the genomes requested to hgN format */
if(origGenome != NULL)
    origGenome = ccFreezeDbConversion(NULL, origGenome, organism);
if(newGenome != NULL) 
    newGenome = ccFreezeDbConversion(NULL, newGenome, organism);

/* make sure that we've got valid arguments */
if((newGenome == NULL || origGenome == NULL || chrom == NULL || chromStart == -1 || chromEnd == -1) && (calledSelf)) 
    webAbort("Error:", "Missing some inputs.");

if( origGenome != NULL && sameString(origGenome, newGenome))
    {
    struct dyString *warning = newDyString(1024);
    dyStringPrintf(warning, "Did you really want to convert from %s to %s (the same genome)?", 
		   ccFreezeDbConversion(origGenome, NULL, organism), \
		   ccFreezeDbConversion(newGenome, NULL, organism));
    appendWarningMsg(warning->string);
    dyStringFree(&warning);
    }
}
Esempio n. 5
0
void outputSeq(DNA *dna, int dnaSize,
	boolean hiliteRange, long startRange, long endRange,
	FILE *out)
/* Write out sequence. */
{
struct dfm dfm;
int i;
char *seq = dna;
int size = dnaSize;

if (cgiBoolean("translate"))
    {
    int utr5 = 0;
    int maxProtSize = (dnaSize+2)/3;
    char *prot = needMem(maxProtSize + 1);
    if (cgiVarExists("utr5"))
        utr5 = cgiInt("utr5")-1;
    startRange -= utr5;
    endRange -= utr5;
    startRange /= 3;
    endRange /= 3;
    dna += utr5;
    seq = prot;
    for (size = 0; size < maxProtSize; ++size)
        {
        if ((*prot++ = lookupCodon(dna)) == 0)
            break;
        dna += 3;
        }
    *prot = 0;
    }
initDfm(&dfm, 10, 50, TRUE, hiliteRange, startRange, endRange, out);
for (i=0; i<size; ++i)
    dfmOut(&dfm, seq[i]);
}
Esempio n. 6
0
void initRegPlotOutput(struct dyString *script, char *fileName)
/* Init eithe a pdf or bitmap device. */
{
if(cgiBoolean("pdf"))
    dyStringPrintf(script, "pdf(file='%s', width=6, height=6);\n", fileName);
else
    dyStringPrintf(script, "bitmap('%s', width=3, height=3, res=200);\n", fileName);
}
Esempio n. 7
0
void makePlotLink(char *type, char *fileName)
/* Write out an image or pdf link depending on pdf or image. */
{
if(cgiBoolean("pdf"))
    dyStringPrintf(html, "<a href='%s'>%s</a><br>\n", 
		   fileName, type);
else
    dyStringPrintf(html, "<img src='%s'><br>\n", fileName);
}
Esempio n. 8
0
int main(int argc, char *argv[])
/* Process command line. */
{
cgiSpoof(&argc, argv);
zeroOk = cgiBoolean("zeroOk");
if (argc < 2)
    usage();
return endsInLf(argc-1, argv+1);
}
Esempio n. 9
0
int main(int argc, char *argv[])
/* Process command line. */
{
cgiSpoof(&argc, argv);
ver = cgiBoolean("ver");
if (argc < 2)
    usage();
agpCloneList(argc-1, argv+1);
return 0;
}
int main(int argc, char *argv[])
/* Process command line. */
{
cgiSpoof(&argc, argv);
stretch = cgiBoolean("stretch");
if (argc != 4)
    usage();
uniqSize(argv[1], argv[2], argv[3], cgiOptionalString("altFile"));
return 0;
}
void doMiddle()
/* Write middle part of .html. */
{
DNA *targetDna;
char *chrom;
int tStart, tEnd;
struct xaAli *xa;
int bothStart, bothEnd;
char cbCosmidName[256];
char *s;

/* Get input variables from CGI. */
char *qOrganism = cgiString("qOrganism");
char *tOrganism = cgiString("tOrganism");
char *query = cgiString("query");
char *target = cgiString("target");
char *strandString = cgiString("strand");
char strand = strandString[0];
boolean showSym = cgiBoolean("symbols");
boolean gotClickPos = cgiVarExists("clickPos");
double clickPos;
if (gotClickPos) clickPos = cgiDouble("clickPos");

strcpy(cbCosmidName, query);
if ((s = strrchr(cbCosmidName, '.')) != NULL)
    *s = 0;


/* Get xaAli. */
xa = getOneXaAli(qOrganism, query);

printf("<H2>Alignment of <I>C. briggsae</I> %s:%d-%d and <I>C. elegans</I> %s</H2>\n",
    cbCosmidName, xa->qStart, xa->qEnd, target);


htmlParagraph("<I>C. briggsae</I> appears on top. Coding regions in <I>C. elegans</I> are in upper case.");


/* Get display window. */
if (!wormParseChromRange(target, &chrom, &tStart, &tEnd))
    errAbort("Target %s isn't formatted correctly", target);

/* Figure out intersection of display window and xeno-alignment */
bothStart = max(xa->tStart, tStart);
bothEnd = min(xa->tEnd, tEnd);

/* Get upper-cased-exon target DNA. */
targetDna = wormChromPartExonsUpper(chrom, bothStart, bothEnd - bothStart);
upcCorresponding(targetDna, bothEnd - bothStart, xa->tSym, bothStart - xa->tStart);


printf("<TT><PRE>");
showTargetRange(xa, bothStart - xa->tStart, bothEnd-bothStart, strand, showSym);
printf("</TT></PRE>");
}
Esempio n. 12
0
int main(int argc, char *argv[])
/* Process command line. */
{
cgiSpoof(&argc, argv);
clTest = cgiBoolean("test");
clDots = cgiOptionalInt("dots", clDots);
if (argc != 9)
    usage();
proteinDB = argv[8];
hgRefSeqMrna(argv[1], argv[2], argv[3], argv[4], argv[5], argv[6], argv[7]);
return 0;
}
Esempio n. 13
0
int main(int argc, char *argv[])
{
cgiSpoof(&argc, argv);
if(argc == 1)
    usage();
else 
    {
    suffix = cgiUsualString("suffix", "pairs.sample.norm");
    doAll = cgiBoolean("doAll");
    avgTranscriptomeExps();
    }
return 0;
}
boolean bedPassFilters(struct bed *bed, struct altGraphX *ag, int cassetteEdge)
{
int minFlankingNum = cgiUsualInt("minFlankingNum", 2);
int minFlankingSize = cgiUsualInt("minFlankingSize", 0);
boolean mrnaFilter = cgiBoolean("mrnaFilter");
boolean passed = TRUE;
int i =0;
for(i = 0; i<bed->blockCount; i++)
    {
    if(bed->expIds[i] != cassetteEdge)
	{
	passed &= passFilter(bed, i, ag, minFlankingNum, minFlankingSize, mrnaFilter);
	}
    }
return passed;
}
Esempio n. 15
0
void doMiddle(struct cart *theCart)
/* Write header and body of html page. */
{
char *userSeq;
char *db, *organism;
boolean clearUserSeq = cgiBoolean("Clear");

cart = theCart;
dnaUtilOpen();

orgChange = sameOk(cgiOptionalString("changeInfo"),"orgChange");
if (orgChange)
    {
    cgiVarSet("db", hDefaultDbForGenome(cgiOptionalString("org"))); 
    }
getDbAndGenome(cart, &db, &organism, oldVars);
char *oldDb = cloneString(db);
findClosestServer(&db, &organism);

/* Get sequence - from userSeq variable, or if 
 * that is empty from a file. */
if (clearUserSeq)
    {
    cartSetString(cart, "userSeq", "");
    cartSetString(cart, "seqFile", "");
    }
userSeq = cartUsualString(cart, "userSeq", "");
if (isEmpty(userSeq))
    {
    userSeq = cartOptionalString(cart, "seqFile");
    }
if (isEmpty(userSeq) || orgChange)
    {
    cartWebStart(theCart, db, "%s BLAT Search", trackHubSkipHubName(organism));
    if (differentString(oldDb, db))
	printf("<HR><P><EM><B>Note:</B> BLAT search is not available for %s %s; "
	       "defaulting to %s %s</EM></P><HR>\n",
	       hGenome(oldDb), hFreezeDate(oldDb), organism, hFreezeDate(db));
    askForSeq(organism,db);
    cartWebEnd();
    }
else 
    {
    blatSeq(skipLeadingSpaces(userSeq), organism);
    }
}
Esempio n. 16
0
void loadDatabase(char *database, char *track, char *tabName)
/* Load up database from tab-file. */
{
struct sqlConnection *conn = sqlConnect(database);
struct dyString *dy = newDyString(1024);


if (!cgiBoolean("add"))
    {
    sqlDyStringPrintf(dy, createString, track);
    sqlRemakeTable(conn, track, dy->string);
    dyStringClear(dy);
    }
sqlDyStringPrintf(dy, "load data local infile '%s' into table %s",
	tabName, track);
sqlUpdate(conn, dy->string);
dyStringFree(&dy);
sqlDisconnect(&conn);
}
int main(int argc, char *argv[])
{
struct altGraphX *agList = NULL;
int cassetteCount = 0;
float minConfidence = 0;
char *bedFileName = NULL;
char *faFile = NULL;
FILE *faOut = NULL;
FILE *bedOut = NULL;
boolean mrnaFilter = FALSE;
float estPrior = 0.0;
int minSize = 0;
if(argc < 4)
    usage();
cgiSpoof(&argc, argv);
warn("Loading graphs.");
agList = altGraphXLoadAll(argv[1]);
bedFileName = cgiOptionalString("bedFile");
minConfidence = cgiDouble("minConf");
db = cgiString("db");
faFile = cgiOptionalString("faFile");
estPrior = cgiOptionalDouble("estPrior", 10);
minSize = cgiOptionalInt("minSize", 0);
mrnaFilter = cgiBoolean("mrnaFilter");
if(mrnaFilter)
    loadMrnaHash();
warn("Counting cassette exons from %d clusters above confidence: %f", slCount(agList), minConfidence);
if(bedFileName != NULL)
    {
    bedOut = mustOpen(bedFileName, "w");
    printCommandState(argc, argv, bedOut);
    fprintf(bedOut, "track name=cass_conf-%4.2f_est-%3.2f description=\"spliceStats minConf=%4.2f estPrior=%3.2f minSize=%d\"\n", 
	    minConfidence, estPrior, minConfidence, estPrior, minSize);
    }
if(faFile != NULL)
    faOut = mustOpen(faFile, "w");
cassetteCount = countCassetteExons(agList, minConfidence, faOut,bedOut );
carefulClose(&faOut);
carefulClose(&bedOut);
warn("%d cassette exons out of %d clusters in %s", cassetteCount, slCount(agList), argv[1]);
altGraphXFreeList(&agList);
return 0;
}
Esempio n. 18
0
void submitUrl(struct sqlConnection *conn)
/* Submit validated manifest if it is not already in process.  Show
 * progress once it is in progress. */
{
/* Parse email and URL out of CGI vars. Do a tiny bit of error checking. */
char *url = trimSpaces(cgiString("url"));
if (!stringIn("://", url))
    errAbort("%s doesn't seem to be a valid URL, no '://'", url);

/* Do some reality checks that email and URL actually exist. */
edwMustGetUserFromEmail(conn, userEmail);
int sd = netUrlMustOpenPastHeader(url);
close(sd);

edwAddSubmitJob(conn, userEmail, url, cgiBoolean("update"));

/* Give the system a half second to react and then put up status info about submission */
sleep1000(1000);
monitorSubmission(conn);
}
Esempio n. 19
0
void doMatrixPlot(struct dyString *script, 
		  char *skipPSet, char *tableName, char *type,
		  boolean linesOnly)
/* Print out a matrix plot for a particular table. */
{
struct tempName plotFile;
struct dyString *query = newDyString(256);
struct dMatrix *dM = NULL;
char title[256];
safef(title, sizeof(title), "%s - %s", altEvent->geneName, type);
if(cgiBoolean("pdf"))
    makeTempName(&plotFile, "sp", ".pdf");
else
    makeTempName(&plotFile, "sp", ".png");
touchBlank(plotFile.forCgi);
makePlotLink(type, plotFile.forCgi);
//dyStringPrintf(html, "<img src='%s'><br>\n", plotFile.forCgi);
constructQueryForEvent(query, skipPSet, tableName);
dM = dataFromTable(tableName, query->string);
plotMatrixRows(dM, script, title, plotFile.forCgi, type, linesOnly) ;
}
Esempio n. 20
0
void intronSizes(char *database, char *table)
/* intronSizes - Output list of intron sizes.. */
{
    struct dyString *query = newDyString(1024);
    struct sqlConnection *conn;
    struct sqlResult *sr;
    char **row;
    struct genePred *gp;
    int rowOffset;
    struct bed *bedList = NULL, *bed = NULL;

    hSetDb(database);
    rowOffset = hOffsetPastBin(NULL, table);
    conn = hAllocConn(database);
    sqlDyStringPrintf(query, "select * from %s", table);
    if (chromName != NULL)
        dyStringPrintf(query, " where chrom = '%s'", chromName);
    if (cgiBoolean("withUtr"))
    {
        dyStringPrintf(query, " %s txStart != cdsStart",
                       (chromName == NULL ? "where" : "and"));
    }
    sr = sqlGetResult(conn, query->string);
    while ((row = sqlNextRow(sr)) != NULL)
    {
        gp = genePredLoad(row+rowOffset);
        genePredIntrons(gp, &bedList);
        slReverse(&bedList);
        for (bed = bedList ; bed != NULL ; bed=bed->next)
            bedTabOutN(bed,6, stdout);
        bedFreeList(&bedList);
        genePredFree(&gp);
    }
    sqlFreeResult(&sr);
    hFreeConn(&conn);
}
Esempio n. 21
0
void intronEnds(char *database, char *table)
/* intronEnds - Gather stats on intron ends.. */
{
struct dyString *query = newDyString(1024);
struct sqlConnection *conn;
struct sqlResult *sr;
char **row;
struct genePred *gp;
int total = 0;
int gtag = 0;
int gcag = 0;
int atac = 0;
int ctac = 0;
DNA ends[4];
int exonIx, txStart;
struct dnaSeq *seq;
int rowOffset;
char strand;

rowOffset = hOffsetPastBin(database, NULL, table);
conn = hAllocConn(database);
sqlDyStringPrintf(query, "select * from %s", table);
if (chromName != NULL)
    dyStringPrintf(query, " where chrom = '%s'", chromName);
if (cgiBoolean("withUtr"))
    {
    dyStringPrintf(query, " %s txStart != cdsStart", 
        (chromName == NULL ? "where" : "and"));
    }
sr = sqlGetResult(conn, query->string);
while ((row = sqlNextRow(sr)) != NULL)
    {
    gp = genePredLoad(row+rowOffset);
    strand = gp->strand[0];
    txStart = gp->txStart;
    seq = hDnaFromSeq(database, gp->chrom, txStart, gp->txEnd, dnaLower);
    for (exonIx=1; exonIx < gp->exonCount; ++exonIx)
        {
	++total;
	memcpy(ends, seq->dna + gp->exonEnds[exonIx-1] - txStart, 2);
	memcpy(ends+2, seq->dna + gp->exonStarts[exonIx] - txStart - 2, 2);
	if (strand == '-')
	    reverseComplement(ends, 4);
	if (ends[0] == 'g' && ends[1] == 't' && ends[2] == 'a' && ends[3] == 'g')
	   ++gtag;
	if (ends[0] == 'g' && ends[1] == 'c' && ends[2] == 'a' && ends[3] == 'g')
	   ++gcag;
	if (ends[0] == 'a' && ends[1] == 't' && ends[2] == 'a' && ends[3] == 'c')
	   ++atac;
	if (ends[0] == 'c' && ends[1] == 't' && ends[2] == 'a' && ends[3] == 'c')
	   ++ctac;
	}
    freeDnaSeq(&seq);
    genePredFree(&gp);
    }
sqlFreeResult(&sr);
hFreeConn(&conn);
printf("gt/ag %d (%4.2f)\n", gtag, 100.0*gtag/total);
printf("gc/ag %d (%4.2f)\n", gcag, 100.0*gcag/total);
printf("at/ac %d (%4.2f)\n", atac, 100.0*atac/total);
printf("ct/ac %d (%4.2f)\n", ctac, 100.0*ctac/total);
printf("Total %d\n", total);
}
Esempio n. 22
0
void doMiddle()
{
char *seqName;
boolean intronsLowerCase = TRUE;
boolean intronsParenthesized = FALSE;
boolean hiliteNear = FALSE;
int startRange = 0;
int endRange = 0;
boolean gotRange = FALSE;
struct dnaSeq *cdnaSeq;
boolean isChromRange = FALSE;
DNA *dna;
char *translation = NULL;

seqName = cgiString("geneName");
seqName = trimSpaces(seqName);
if (cgiVarExists("intronsLowerCase"))
    intronsLowerCase = cgiBoolean("intronsLowerCase");
if (cgiVarExists("intronsParenthesized"))
    intronsParenthesized = cgiBoolean("intronsParenthesized");
if (cgiVarExists("startRange") && cgiVarExists("endRange" ))
    {
    startRange = cgiInt("startRange");
    endRange = cgiInt("endRange");
    gotRange = TRUE;
    }
if (cgiVarExists("hiliteNear"))
    {
    hiliteNear = TRUE;
    }
fprintf(stdout, "<P><TT>\n");

/* The logic here is a little complex to optimize speed.
 * If we can decide what type of thing the name refers to by
 * simply looking at the name we do.  Otherwise we have to
 * search the database in various ways until we get a hit. */
if (wormIsNamelessCluster(seqName))
    {
    isChromRange = TRUE;
    }
else if (wormIsChromRange(seqName))
    {
    isChromRange = TRUE;
    }
else if (getWormGeneDna(seqName, &dna, TRUE))
    {
    if (cgiBoolean("litLink"))
        {
        char nameBuf[64];
        char *geneName = NULL;
        char *productName = NULL;
        char *coding;
        int transSize;
        struct wormCdnaInfo info;

        printf("<H3>Information and Links for %s</H3>\n", seqName);
        if (wormInfoForGene(seqName, &info))
            {
            if (info.description)
                printf("<P>%s</P>\n", info.description);
            geneName = info.gene;
            productName = info.product;
            }
        else
            {
            if (wormIsGeneName(seqName))
                geneName = seqName;
            else if (wormGeneForOrf(seqName, nameBuf, sizeof(nameBuf)))
                geneName = nameBuf;
            }
        coding = cloneUpperOnly(dna);
        transSize = 1 + (strlen(coding)+2)/3;
        translation = needMem(1+strlen(coding)/3);
        dnaTranslateSome(coding, translation, transSize);
        freez(&coding);

        if (geneName)
            {
            printf("<A HREF=\"http://www.ncbi.nlm.nih.gov/htbin-post/Entrez/query?form=4&db=m"
                    "&term=C+elegans+%s&dispmax=50&relentrezdate=No+Limit\">", geneName);
            printf("PubMed search on gene: </A>%s<BR>\n", geneName);
            }
        if (productName)
            {
            char *encoded = cgiEncode(productName);
            printf("<A HREF=\"http://www.ncbi.nlm.nih.gov/htbin-post/Entrez/query?form=4&db=m"
                    "&term=%s&dispmax=50&relentrezdate=No+Limit\">", encoded);
            printf("PubMed search on product:</A> %s<BR>\n", productName);
            freeMem(encoded);
            }
        /* Process name to get rid of isoform letter for Proteome. */
        if (geneName)
            strcpy(nameBuf, geneName);
        else
            {
            strcpy(nameBuf, seqName);
#ifdef NEVER
            /* Sometimes Proteome requires the letter after the orf name
             * in alt-spliced cases, sometimes it can't handle it.... */
            nameLen = strlen(nameBuf);
            if (wormIsOrfName(nameBuf) && isalpha(nameBuf[nameLen-1]))
                {
                char *dotPos = strrchr(nameBuf, '.');
                if (dotPos != NULL && isdigit(dotPos[1]))
                    nameBuf[nameLen-1] = 0;
                }
#endif /* NEVER */
            }
	printf("<A HREF=\"http://www.wormbase.org/db/seq/sequence?name=%s;class=Sequence\">", seqName);
	printf("WormBase link on:</A> %s<BR>\n", seqName);
        printf("<A HREF=\"http://www.proteome.com/databases/WormPD/reports/%s.html\">", nameBuf);
        printf("Proteome link on:</A> %s<BR>\n<BR>\n", nameBuf);


        printf("<A HREF=#DNA>Genomic DNA Sequence</A><BR>\n");
        if (hiliteNear)
            printf("<A HREF=\"#CLICKED\">Shortcut to where you clicked in gene</A><BR>");
        printf("<A HREF=#protein>Translated Protein Sequence</A><BR>\n");
        htmlHorizontalLine();
	printf("<A NAME=DNA></A>");
        printf("<H3>%s Genomic DNA sequence</H3>", seqName);
        }
    if (!intronsLowerCase)
        tolowers(dna);
    if (hiliteNear)
	{
	if (!gotRange)
	    {
	    double nearPos = cgiDouble("hiliteNear");
	    int rad = 5;
	    int dnaSize = strlen(dna);
	    long mid = (int)(dnaSize * nearPos);
	    startRange = mid - rad;
	    if (startRange < 0) startRange = 0;
	    endRange = mid + rad;
	    if (endRange >= dnaSize) endRange = dnaSize - 1;
	    }
	}
    outputSeq(dna, strlen(dna), hiliteNear, startRange, endRange, stdout);
    freez(&dna);
    }
else if (wormCdnaSeq(seqName, &cdnaSeq, NULL))
    {
    outputSeq(cdnaSeq->dna, cdnaSeq->size, FALSE, 0, 0, stdout);
    }
else
    {
    isChromRange = TRUE;
    }
if (isChromRange)
    {
    char *chromId;
    int start, end;
    char strand = '+';
    int size;

    if (!wormGeneRange(seqName, &chromId, &strand, &start, &end))
        errAbort("Can't find %s",seqName);
    size = end - start;
    if (intronsLowerCase)
        dna = wormChromPartExonsUpper(chromId, start, size);
    else
        {
        dna = wormChromPart(chromId, start, size);
        touppers(dna);
        }
    if (cgiVarExists("strand"))
        strand = cgiString("strand")[0];
    if (strand == '-')
        reverseComplement(dna, size);
    outputSeq(dna, size, FALSE, 0, 0, stdout);
    }
if (translation != NULL)
    {
    htmlHorizontalLine();
    printf("<A NAME=protein></A>");
    printf("<H3>Translated Protein of %s</H3>\n", seqName);
    outputSeq(translation, strlen(translation), FALSE, 0, 0, stdout);
    freez(&translation);
    }
fprintf(stdout, "</TT></P>\n");

}
Esempio n. 23
0
void faNcbiToUcsc(char *inFile, char *out)
/* faNcbiToUcsc - Convert FA file from NCBI to UCSC format.. */
{
struct lineFile *lf = lineFileOpen(inFile, TRUE);
char outName[512];
char *line;
boolean split = cgiBoolean("split");
boolean ntLast = cgiBoolean("ntLast");
boolean encode = cgiBoolean("encode");
struct dnaSeq seq;
FILE *f = NULL;
char *wordBefore = cgiUsualString("wordBefore", "gb");
int wordIx = cgiUsualInt("wordIx", -1);
char *e = NULL;
char *nt = NULL;
ZeroVar(&seq);

if (split)
    makeDir(out);
else
    f = mustOpen(out, "w");
while (lineFileNext(lf, &line, NULL))
    {
    if (line[0] == '>')
	{
	if (ntLast || encode)
	    {
	    nt = NULL;
            if (ntLast)
                {
		e = NULL;
                nt = stringIn("NT_", line);
                if (nt == NULL)
                    nt = stringIn("NG_", line);
                if (nt == NULL)
                    nt = stringIn("NC_", line);
                if (nt == NULL)
                    errAbort("Expecting NT_ NG_ or NC_in '%s'", line);
                e = strchr(nt, '|');
                if (e != NULL) *e = 0;
                e = strchr(nt, ' ');
                if (e != NULL) *e = 0;
                }
            else 
                {
                nt = stringIn("|EN", line);
                if (nt == NULL)
                    errAbort("Expecting EN in %s", line);
                nt++;
                nt = firstWordInLine(nt);
                }
	    if (split)
		{
		sprintf(outName, "%s/%s.fa", out, nt);
		carefulClose(&f);
		f = mustOpen(outName, "w");
		}
	    fprintf(f, ">%s\n", nt);
	    }

        else
	    {
	    char *words[32];
	    int wordCount, i;
	    char *accession = NULL;
	    wordCount = chopString(line+1, "|", words, ArraySize(words));
	    if (wordIx >= 0)
		{
		if (wordIx >= wordCount)
		    errAbort("Sorry only %d words", wordCount);
	        accession = words[wordIx];
		}
	    else
		{
		for (i=0; i<wordCount-1; ++i)
		    {
		    if (sameString(words[i], wordBefore))
			{
			accession = words[i+1];
			break;
			}
		    }
		if (accession == NULL)
		    errAbort("Couldn't find '%s' line %d of %s", 
			    wordBefore, lf->lineIx, lf->fileName);
		}
	    chopSuffix(accession);
	    fprintf(f, ">%s\n", accession);
	    }
	}
    else
        {
	fprintf(f, "%s\n", line);
	}
    }
}
static void hgSeqConcatRegionsDb(char *db, char *chrom, int chromSize, char strand, char *name,
                                 int rCount, unsigned *rStarts, unsigned *rSizes,
                                 boolean *exonFlags, boolean *cdsFlags)
/* Concatenate and print out dna for a series of regions. */
{
// Note: this code use to generate different sequence ids if the global
// database in hdb was different than the db parameter.  This functionality
// has been removed since the global database was removed and it didn't
// appear to be used.

    struct dnaSeq *rSeq = NULL;
    struct dnaSeq *cSeq = NULL;
    char recName[256];
    int seqStart, seqEnd;
    int offset, cSize;
    int i;
    boolean isRc     = (strand == '-') || cgiBoolean("hgSeq.revComp");
    boolean maskRep  = cgiBoolean("hgSeq.maskRepeats");
    int padding5     = cgiOptionalInt("hgSeq.padding5", 0);
    int padding3     = cgiOptionalInt("hgSeq.padding3", 0);
    char *casing     = cgiString("hgSeq.casing");
    char *repMasking = cgiString("hgSeq.repMasking");
    char *granularity  = cgiOptionalString("hgSeq.granularity");
    boolean concatRegions = granularity && sameString("gene", granularity);

    if (rCount < 1)
        return;

    /* Don't support padding if granularity is gene (i.e. concat'ing all). */
    if (concatRegions)
    {
        padding5 = padding3 = 0;
    }

    i = rCount - 1;
    seqStart = rStarts[0]             - (isRc ? padding3 : padding5);
    seqEnd   = rStarts[i] + rSizes[i] + (isRc ? padding5 : padding3);
    /* Padding might push us off the edge of the chrom; if so, truncate: */
    if (seqStart < 0)
    {
        if (isRc)
            padding3 += seqStart;
        else
            padding5 += seqStart;
        seqStart = 0;
    }

    /* if we know the chromSize, don't pad out beyond it */
    if ((chromSize > 0) && (seqEnd > chromSize))
    {
        if (isRc)
            padding5 += (chromSize - seqEnd);
        else
            padding3 += (chromSize - seqEnd);
        seqEnd = chromSize;
    }
    if (seqEnd <= seqStart)
    {
        printf("# Null range for %s_%s (range=%s:%d-%d 5'pad=%d 3'pad=%d) (may indicate a query-side insert)\n",
               db,
               name,
               chrom, seqStart+1, seqEnd,
               padding5, padding3);
        return;
    }
    if (maskRep)
    {
        rSeq = hDnaFromSeq(db, chrom, seqStart, seqEnd, dnaMixed);
        if (sameString(repMasking, "N"))
            lowerToN(rSeq->dna, strlen(rSeq->dna));
        if (!sameString(casing, "upper"))
            tolowers(rSeq->dna);
    }
    else if (sameString(casing, "upper"))
        rSeq = hDnaFromSeq(db, chrom, seqStart, seqEnd, dnaUpper);
    else
        rSeq = hDnaFromSeq(db, chrom, seqStart, seqEnd, dnaLower);

    /* Handle casing and compute size of concatenated sequence */
    cSize = 0;
    for (i=0;  i < rCount;  i++)
    {
        if ((sameString(casing, "exon") && exonFlags[i]) ||
                (sameString(casing, "cds") && cdsFlags[i]))
        {
            int rStart = rStarts[i] - seqStart;
            toUpperN(rSeq->dna+rStart, rSizes[i]);
        }
        cSize += rSizes[i];
    }
    cSize += (padding5 + padding3);
    AllocVar(cSeq);
    cSeq->dna = needLargeMem(cSize+1);
    cSeq->size = cSize;

    offset = 0;
    for (i=0;  i < rCount;  i++)
    {
        int start = rStarts[i] - seqStart;
        int size  = rSizes[i];
        if (i == 0)
        {
            start -= (isRc ? padding3 : padding5);
            assert(start == 0);
            size  += (isRc ? padding3 : padding5);
        }
        if (i == rCount-1)
        {
            size  += (isRc ? padding5 : padding3);
        }
        memcpy(cSeq->dna+offset, rSeq->dna+start, size);
        offset += size;
    }
    assert(offset == cSeq->size);
    cSeq->dna[offset] = 0;
    freeDnaSeq(&rSeq);

    if (isRc)
        reverseComplement(cSeq->dna, cSeq->size);

    safef(recName, sizeof(recName),
          "%s_%s range=%s:%d-%d 5'pad=%d 3'pad=%d "
          "strand=%c repeatMasking=%s",
          db,
          name,
          chrom, seqStart+1, seqEnd,
          padding5, padding3,
          (isRc ? '-' : '+'),
          (maskRep ? repMasking : "none"));
    faWriteNext(stdout, recName, cSeq->dna, cSeq->size);
    freeDnaSeq(&cSeq);
}
Esempio n. 25
0
void blatSeq(char *userSeq, char *organism)
/* Blat sequence user pasted in. */
{
FILE *f;
struct dnaSeq *seqList = NULL, *seq;
struct tempName pslTn, faTn;
int maxSingleSize, maxTotalSize, maxSeqCount;
int minSingleSize = minMatchShown;
char *genome, *db;
char *type = cgiString("type");
char *seqLetters = cloneString(userSeq);
struct serverTable *serve;
int conn;
int oneSize, totalSize = 0, seqCount = 0;
boolean isTx = FALSE;
boolean isTxTx = FALSE;
boolean txTxBoth = FALSE;
struct gfOutput *gvo;
boolean qIsProt = FALSE;
enum gfType qType, tType;
struct hash *tFileCache = gfFileCacheNew();
boolean feelingLucky = cgiBoolean("Lucky");

getDbAndGenome(cart, &db, &genome, oldVars);
if(!feelingLucky)
    cartWebStart(cart, db, "%s BLAT Results",  trackHubSkipHubName(organism));
/* Load user sequence and figure out if it is DNA or protein. */
if (sameWord(type, "DNA"))
    {
    seqList = faSeqListFromMemText(seqLetters, TRUE);
    uToT(seqList);
    isTx = FALSE;
    }
else if (sameWord(type, "translated RNA") || sameWord(type, "translated DNA"))
    {
    seqList = faSeqListFromMemText(seqLetters, TRUE);
    uToT(seqList);
    isTx = TRUE;
    isTxTx = TRUE;
    txTxBoth = sameWord(type, "translated DNA");
    }
else if (sameWord(type, "protein"))
    {
    seqList = faSeqListFromMemText(seqLetters, FALSE);
    isTx = TRUE;
    qIsProt = TRUE;
    }
else 
    {
    seqList = faSeqListFromMemTextRaw(seqLetters);
    isTx = !seqIsDna(seqList);
    if (!isTx)
	{
	for (seq = seqList; seq != NULL; seq = seq->next)
	    {
	    seq->size = dnaFilteredSize(seq->dna);
	    dnaFilter(seq->dna, seq->dna);
	    toLowerN(seq->dna, seq->size);
	    subChar(seq->dna, 'u', 't');
	    }
	}
    else
	{
	for (seq = seqList; seq != NULL; seq = seq->next)
	    {
	    seq->size = aaFilteredSize(seq->dna);
	    aaFilter(seq->dna, seq->dna);
	    toUpperN(seq->dna, seq->size);
	    }
	qIsProt = TRUE;
	}
    }
if (seqList != NULL && seqList->name[0] == 0)
    {
    freeMem(seqList->name);
    seqList->name = cloneString("YourSeq");
    }
trimUniq(seqList);

/* If feeling lucky only do the first on. */
if(feelingLucky && seqList != NULL)
    {
    seqList->next = NULL;
    }

/* Figure out size allowed. */
maxSingleSize = (isTx ? 10000 : 75000);
maxTotalSize = maxSingleSize * 2.5;
#ifdef LOWELAB
maxSeqCount = 200;
#else
maxSeqCount = 25;
#endif

/* Create temporary file to store sequence. */
trashDirFile(&faTn, "hgSs", "hgSs", ".fa");
faWriteAll(faTn.forCgi, seqList);

/* Create a temporary .psl file with the alignments against genome. */
trashDirFile(&pslTn, "hgSs", "hgSs", ".pslx");
f = mustOpen(pslTn.forCgi, "w");
gvo = gfOutputPsl(0, qIsProt, FALSE, f, FALSE, TRUE);
serve = findServer(db, isTx);
/* Write header for extended (possibly protein) psl file. */
if (isTx)
    {
    if (isTxTx)
        {
	qType = gftDnaX;
	tType = gftDnaX;
	}
    else
        {
	qType = gftProt;
	tType = gftDnaX;
	}
    }
else
    {
    qType = gftDna;
    tType = gftDna;
    }
pslxWriteHead(f, qType, tType);

if (qType == gftProt)
    {
    minSingleSize = 14;
    }
else if (qType == gftDnaX)
    {
    minSingleSize = 36;
    }


/* Loop through each sequence. */
for (seq = seqList; seq != NULL; seq = seq->next)
    {
    printf(" "); fflush(stdout);  /* prevent apache cgi timeout by outputting something */
    oneSize = realSeqSize(seq, !isTx);
    if ((seqCount&1) == 0)	// Call bot delay every 2nd time starting with first time
	hgBotDelay();
    if (++seqCount > maxSeqCount)
        {
	warn("More than 25 input sequences, stopping at %s.",
	    seq->name);
	break;
	}
    if (oneSize > maxSingleSize)
	{
	warn("Sequence %s is %d letters long (max is %d), skipping",
	    seq->name, oneSize, maxSingleSize);
	continue;
	}
    if (oneSize < minSingleSize)
        {
	warn("Warning: Sequence %s is only %d letters long (%d is the recommended minimum)", 
		seq->name, oneSize, minSingleSize);
	// we could use "continue;" here to actually enforce skipping, 
	// but let's give the short sequence a chance, it might work.
	// minimum possible length = tileSize+stepSize, so mpl=16 for dna stepSize=5, mpl=10 for protein.
	if (qIsProt && oneSize < 1) // protein does not tolerate oneSize==0
	    continue;
	}
    totalSize += oneSize;
    if (totalSize > maxTotalSize)
        {
	warn("Sequence %s would take us over the %d letter limit, stopping here.",
	     seq->name, maxTotalSize);
	break;
	}
    conn = gfConnect(serve->host, serve->port);
    if (isTx)
	{
	gvo->reportTargetStrand = TRUE;
	if (isTxTx)
	    {
	    gfAlignTransTrans(&conn, serve->nibDir, seq, FALSE, 5, 
	    	tFileCache, gvo, !txTxBoth);
	    if (txTxBoth)
		{
		reverseComplement(seq->dna, seq->size);
		conn = gfConnect(serve->host, serve->port);
		gfAlignTransTrans(&conn, serve->nibDir, seq, TRUE, 5, 
			tFileCache, gvo, FALSE);
		}
	    }
	else
	    {
	    gfAlignTrans(&conn, serve->nibDir, seq, 5, tFileCache, gvo);
	    }
	}
    else
	{
	gfAlignStrand(&conn, serve->nibDir, seq, FALSE, minMatchShown, tFileCache, gvo);
	reverseComplement(seq->dna, seq->size);
	conn = gfConnect(serve->host, serve->port);
	gfAlignStrand(&conn, serve->nibDir, seq, TRUE, minMatchShown, tFileCache, gvo);
	}
    gfOutputQuery(gvo, f);
    }
carefulClose(&f);
showAliPlaces(pslTn.forCgi, faTn.forCgi, serve->db, qType, tType, 
	      organism, feelingLucky);
if(!feelingLucky)
    cartWebEnd();
gfFileCacheFree(&tFileCache);
}
int hgSeqBed(char *db, struct hTableInfo *hti, struct bed *bedList)
/* Print out dna sequence from the given database of all items in bedList.
 * hti describes the bed-compatibility level of bedList items.
 * Returns number of FASTA records printed out. */
{
    struct bed *bedItem;
    char itemName[128];
    boolean isRc;
    int count;
    unsigned *starts = NULL;
    unsigned *sizes = NULL;
    boolean *exonFlags = NULL;
    boolean *cdsFlags = NULL;
    int i, rowCount, totalCount;
    boolean promoter   = cgiBoolean("hgSeq.promoter");
    boolean intron     = cgiBoolean("hgSeq.intron");
    boolean utrExon5   = cgiBoolean("hgSeq.utrExon5");
    boolean utrIntron5 = utrExon5 && intron;
    boolean cdsExon    = cgiBoolean("hgSeq.cdsExon");
    boolean cdsIntron  = cdsExon && intron;
    boolean utrExon3   = cgiBoolean("hgSeq.utrExon3");
    boolean utrIntron3 = utrExon3 && intron;
    boolean downstream = cgiBoolean("hgSeq.downstream");
    int promoterSize   = cgiOptionalInt("hgSeq.promoterSize", 0);
    int downstreamSize = cgiOptionalInt("hgSeq.downstreamSize", 0);
    char *granularity  = cgiOptionalString("hgSeq.granularity");
    boolean concatRegions = granularity && sameString("gene", granularity);
    boolean concatAdjacent = (cgiBooleanDefined("hgSeq.splitCDSUTR") &&
                              (! cgiBoolean("hgSeq.splitCDSUTR")));
    boolean isCDS, doIntron;
    boolean canDoUTR, canDoIntrons;

    /* catch a special case: introns selected, but no exons -> include all introns
     * instead of qualifying intron with exon flags. */
    if (intron && !(utrExon5 || cdsExon || utrExon3))
    {
        utrIntron5 = cdsIntron = utrIntron3 = TRUE;
    }

    canDoUTR = hti->hasCDS;
    canDoIntrons = hti->hasBlocks;

    rowCount = totalCount = 0;
    for (bedItem = bedList;  bedItem != NULL;  bedItem = bedItem->next)
    {
        if (bedItem->blockCount == 0) /* An intersection may have made hti unreliable. */
            canDoIntrons = FALSE;
        rowCount++;
        int chromSize = hgSeqChromSize(db, bedItem->chrom);
        // bed: translate relative starts to absolute starts
        for (i=0;  i < bedItem->blockCount;  i++)
        {
            bedItem->chromStarts[i] += bedItem->chromStart;
        }
        isRc = (bedItem->strand[0] == '-');
        // here's the max # of feature regions:
        if (canDoIntrons)
            count = 4 + (2 * bedItem->blockCount);
        else
            count = 5;
        maxStartsOffset = count-1;
        starts    = needMem(sizeof(unsigned) * count);
        sizes     = needMem(sizeof(unsigned) * count);
        exonFlags = needMem(sizeof(boolean) * count);
        cdsFlags  = needMem(sizeof(boolean) * count);
        // build up a list of selected regions
        count = 0;
        if (!isRc && promoter && (promoterSize > 0))
        {
            addFeature(&count, starts, sizes, exonFlags, cdsFlags,
                       (bedItem->chromStart - promoterSize), promoterSize,
                       FALSE, FALSE, chromSize);
        }
        else if (isRc && downstream && (downstreamSize > 0))
        {
            addFeature(&count, starts, sizes, exonFlags, cdsFlags,
                       (bedItem->chromStart - downstreamSize), downstreamSize,
                       FALSE, FALSE, chromSize);
        }
        if (canDoIntrons && canDoUTR)
        {
            for (i=0;  i < bedItem->blockCount;  i++)
            {
                if ((bedItem->chromStarts[i] + bedItem->blockSizes[i]) <=
                        bedItem->thickStart)
                {
                    if ((!isRc && utrExon5)   || (isRc && utrExon3))
                    {
                        addFeature(&count, starts, sizes, exonFlags, cdsFlags,
                                   bedItem->chromStarts[i], bedItem->blockSizes[i],
                                   TRUE, FALSE, chromSize);
                    }
                    if (((!isRc && utrIntron5) || (isRc && utrIntron3)) &&
                            (i < bedItem->blockCount - 1))
                    {
                        addFeature(&count, starts, sizes, exonFlags, cdsFlags,
                                   (bedItem->chromStarts[i] +
                                    bedItem->blockSizes[i]),
                                   (bedItem->chromStarts[i+1] -
                                    bedItem->chromStarts[i] -
                                    bedItem->blockSizes[i]),
                                   FALSE, FALSE, chromSize);
                    }
                }
                else if (bedItem->chromStarts[i] < bedItem->thickEnd)
                {
                    if ((bedItem->chromStarts[i] < bedItem->thickStart) &&
                            ((bedItem->chromStarts[i] + bedItem->blockSizes[i]) >
                             bedItem->thickEnd))
                    {
                        if ((!isRc && utrExon5)	  || (isRc && utrExon3))
                        {
                            addFeature(&count, starts, sizes, exonFlags, cdsFlags,
                                       bedItem->chromStarts[i],
                                       (bedItem->thickStart -
                                        bedItem->chromStarts[i]),
                                       TRUE, FALSE, chromSize);
                        }
                        if (cdsExon)
                        {
                            addFeature(&count, starts, sizes, exonFlags, cdsFlags,
                                       bedItem->thickStart,
                                       (bedItem->thickEnd - bedItem->thickStart),
                                       TRUE, TRUE, chromSize);
                        }
                        if ((!isRc && utrExon3)	  || (isRc && utrExon5))
                        {
                            addFeature(&count, starts, sizes, exonFlags, cdsFlags,
                                       bedItem->thickEnd,
                                       (bedItem->chromStarts[i] +
                                        bedItem->blockSizes[i] -
                                        bedItem->thickEnd),
                                       TRUE, FALSE, chromSize);
                        }
                    }
                    else if (bedItem->chromStarts[i] < bedItem->thickStart)
                    {
                        if ((!isRc && utrExon5)	  || (isRc && utrExon3))
                        {
                            addFeature(&count, starts, sizes, exonFlags, cdsFlags,
                                       bedItem->chromStarts[i],
                                       (bedItem->thickStart -
                                        bedItem->chromStarts[i]),
                                       TRUE, FALSE, chromSize);
                        }
                        if (cdsExon)
                        {
                            addFeature(&count, starts, sizes, exonFlags, cdsFlags,
                                       bedItem->thickStart,
                                       (bedItem->chromStarts[i] +
                                        bedItem->blockSizes[i] -
                                        bedItem->thickStart),
                                       TRUE, TRUE, chromSize);
                        }
                    }
                    else if ((bedItem->chromStarts[i] + bedItem->blockSizes[i]) >
                             bedItem->thickEnd)
                    {
                        if (cdsExon)
                        {
                            addFeature(&count, starts, sizes, exonFlags, cdsFlags,
                                       bedItem->chromStarts[i],
                                       (bedItem->thickEnd -
                                        bedItem->chromStarts[i]),
                                       TRUE, TRUE, chromSize);
                        }
                        if ((!isRc && utrExon3)	  || (isRc && utrExon5))
                        {
                            addFeature(&count, starts, sizes, exonFlags, cdsFlags,
                                       bedItem->thickEnd,
                                       (bedItem->chromStarts[i] +
                                        bedItem->blockSizes[i] -
                                        bedItem->thickEnd),
                                       TRUE, FALSE, chromSize);
                        }
                    }
                    else if (cdsExon)
                    {
                        addFeature(&count, starts, sizes, exonFlags, cdsFlags,
                                   bedItem->chromStarts[i], bedItem->blockSizes[i],
                                   TRUE, TRUE, chromSize);
                    }
                    isCDS = ! ((bedItem->chromStarts[i] + bedItem->blockSizes[i]) >
                               bedItem->thickEnd);
                    doIntron = (isCDS ? cdsIntron :
                                ((!isRc) ? utrIntron3 : utrIntron5));
                    if (doIntron && (i < bedItem->blockCount - 1))
                    {
                        addFeature(&count, starts, sizes, exonFlags, cdsFlags,
                                   (bedItem->chromStarts[i] +
                                    bedItem->blockSizes[i]),
                                   (bedItem->chromStarts[i+1] -
                                    bedItem->chromStarts[i] -
                                    bedItem->blockSizes[i]),
                                   FALSE, isCDS, chromSize);
                    }
                }
                else
                {
                    if ((!isRc && utrExon3)   || (isRc && utrExon5))
                    {
                        addFeature(&count, starts, sizes, exonFlags, cdsFlags,
                                   bedItem->chromStarts[i], bedItem->blockSizes[i],
                                   TRUE, FALSE, chromSize);
                    }
                    if (((!isRc && utrIntron3) || (isRc && utrIntron5)) &&
                            (i < bedItem->blockCount - 1))
                    {
                        addFeature(&count, starts, sizes, exonFlags, cdsFlags,
                                   (bedItem->chromStarts[i] +
                                    bedItem->blockSizes[i]),
                                   (bedItem->chromStarts[i+1] -
                                    bedItem->chromStarts[i] -
                                    bedItem->blockSizes[i]),
                                   FALSE, FALSE, chromSize);
                    }
                }
            }
        }
        else if (canDoIntrons)
        {
            for (i=0;  i < bedItem->blockCount;  i++)
            {
                if (cdsExon)
                {
                    addFeature(&count, starts, sizes, exonFlags, cdsFlags,
                               bedItem->chromStarts[i], bedItem->blockSizes[i],
                               TRUE, FALSE, chromSize);
                }
                if (cdsIntron && (i < bedItem->blockCount - 1))
                {
                    addFeature(&count, starts, sizes, exonFlags, cdsFlags,
                               (bedItem->chromStarts[i] + bedItem->blockSizes[i]),
                               (bedItem->chromStarts[i+1] -
                                bedItem->chromStarts[i] -
                                bedItem->blockSizes[i]),
                               FALSE, FALSE, chromSize);
                }
            }
        }
        else if (canDoUTR)
        {
            if (bedItem->thickStart == 0 && bedItem->thickEnd == 0)
                bedItem->thickStart = bedItem->thickEnd = bedItem->chromStart;
            if ((!isRc && utrExon5)   || (isRc && utrExon3))
            {
                addFeature(&count, starts, sizes, exonFlags, cdsFlags,
                           bedItem->chromStart,
                           (bedItem->thickStart - bedItem->chromStart),
                           TRUE, FALSE, chromSize);
            }
            if (cdsExon)
            {
                addFeature(&count, starts, sizes, exonFlags, cdsFlags,
                           bedItem->thickStart,
                           (bedItem->thickEnd - bedItem->thickStart),
                           TRUE, TRUE, chromSize);
            }
            if ((!isRc && utrExon3)   || (isRc && utrExon5))
            {
                addFeature(&count, starts, sizes, exonFlags, cdsFlags,
                           bedItem->thickEnd,
                           (bedItem->chromEnd - bedItem->thickEnd),
                           TRUE, FALSE, chromSize);
            }
        }
        else
        {
            addFeature(&count, starts, sizes, exonFlags, cdsFlags,
                       bedItem->chromStart,
                       (bedItem->chromEnd - bedItem->chromStart),
                       TRUE, FALSE, chromSize);
        }
        if (!isRc && downstream && (downstreamSize > 0))
        {
            addFeature(&count, starts, sizes, exonFlags, cdsFlags,
                       bedItem->chromEnd, downstreamSize, FALSE, FALSE, chromSize);
        }
        else if (isRc && promoter && (promoterSize > 0))
        {
            addFeature(&count, starts, sizes, exonFlags, cdsFlags,
                       bedItem->chromEnd, promoterSize, FALSE, FALSE, chromSize);
        }
        snprintf(itemName, sizeof(itemName), "%s_%s", hti->rootName, bedItem->name);
        hgSeqRegionsAdjDb(db, bedItem->chrom, chromSize, bedItem->strand[0], itemName,
                          concatRegions, concatAdjacent,
                          count, starts, sizes, exonFlags, cdsFlags);
        totalCount += count;
        freeMem(starts);
        freeMem(sizes);
        freeMem(exonFlags);
        freeMem(cdsFlags);
    }
    return totalCount;
}
Esempio n. 27
0
void doRegressionPlot(struct dyString *script, char *skipPset, 
		      char *incTable, char *skipTable, char *geneTable)
/* Put up a regression plot. */
{
struct dMatrix *skip = NULL, *inc = NULL, *gene = NULL;
char query[256];
int i = 0;
struct tempName regPlot;
double thresh = .75;
if(cgiBoolean("allPoints"))
    thresh = -1;
if(cgiBoolean("pdf"))
    makeTempName(&regPlot, "sp", ".pdf");
else
    makeTempName(&regPlot, "sp", ".png");
touchBlank(regPlot.forCgi);

sqlSafef(query, sizeof(query), "select * from %s where name like '%%%s%%';", 
      incTable, skipPset);
inc = dataFromTable(incTable, query);

sqlSafef(query, sizeof(query), "select * from %s where name like '%%%s%%';", 
      skipTable, skipPset);
skip = dataFromTable(skipTable, query);

sqlSafef(query, sizeof(query), "select * from %s where name like '%%%s%%';", 
      geneTable, skipPset);
gene = dataFromTable(geneTable, query);

initRegPlotOutput(script, regPlot.forCgi);
dyStringPrintf(script, "incDat = c(");
for(i = 0; i< inc->colCount; i++)
    dyStringPrintf(script, "%.4f,", inc->matrix[0][i]);
dyStringPrintf(script, ");\n");

dyStringPrintf(script, "skipDat = c(");
for(i = 0; i< skip->colCount; i++)
    dyStringPrintf(script, "%.4f,", skip->matrix[0][i]);
dyStringPrintf(script, ");\n");

dyStringPrintf(script, "geneDat = c(");
for(i = 0; i< gene->colCount; i++)
    dyStringPrintf(script, "%.4f,", gene->matrix[0][i]);
dyStringPrintf(script, ");\n");

dyStringPrintf(script, "expressed = geneDat > %.4f;\n", thresh);

dyStringPrintf(script, abbv);
if(cgiBoolean("muscle"))
    {
    dyStringPrintf(script, isMuscle);
    dyStringPrintf(script, plotRegression, altEvent->geneName, "Muscle", "Muscle");
    }
else
    {
    dyStringPrintf(script, isBrain);
    dyStringPrintf(script, plotRegression, altEvent->geneName, "Brain", "Brain");
    }
closePlotOutput(script);
makePlotLink("Include vs. Skip", regPlot.forCgi);
dyStringPrintf(html, "<table width=600><tr><td>\n");
dyStringPrintf(html, "Tissues for which the gene expression was too low to be considered are in lighter colors.<br><br>\n");
dyStringPrintf(html, "%s\n", key);
dyStringPrintf(html, "</td></tr></table><br>\n");
//dyStringPrintf(html, "<img src='%s'><br>\n", regPlot.forCgi);
}