int main(int argc, char *argv[]) /* Process command line. */ { cgiSpoof(&argc, argv); if (argc != 3) usage(); doTarget = !cgiBoolean("query"); noHead = (cgiBoolean("nohead") || cgiBoolean("noHead")); maxPile = cgiOptionalInt("maxPile", maxPile); minPile = cgiOptionalInt("minPile", minPile); pslUnpile(argv[1], argv[2]); return 0; }
int main(int argc, char *argv[]) /* Process command line. */ { cgiSpoof(&argc, argv); if (argc < 2) usage(); recurse = cgiBoolean("r"); suffix = cgiOptionalString("suffix"); wildCard = cgiOptionalString("wild"); nonz = cgiBoolean("nonz"); catDir(argc-1, argv+1); return 0; }
int main(int argc, char *argv[]) /* Process command line. */ { cgiSpoof(&argc, argv); textField = cgiUsualString("textField", textField); fileComment = cgiUsualString("comment", fileComment); picky = cgiBoolean("picky"); makeMain = cgiBoolean("main"); positiveOnly = cgiBoolean("positive"); if (argc != 3) usage(); autoXml(argv[1], argv[2]); return 0; }
void checkArguments() /** setup our parameters depending on whether we've been called as a cgi script or from the command line */ { hgTest = cgiBoolean("hgTest"); numTests = cgiOptionalInt("numTests",0); origDb = cgiOptionalString("origDb"); origGenome = cgiOptionalString("origGenome"); newGenome = cgiOptionalString("newGenome"); position = cgiOptionalString("position"); chrom = cgiOptionalString("chrom"); chromStart = cgiOptionalInt("chromStart", -1); chromEnd = cgiOptionalInt("chromEnd", -1); calledSelf = cgiBoolean("calledSelf"); /* if we're testing we don't need to worry about UI errors */ if(hgTest) return; /* parse the position string and make sure that it makes sense */ if (position != NULL && position[0] != 0) { parsePosition(cloneString(position), &chrom, &chromStart, &chromEnd); } if (chromStart > chromEnd) { webAbort("Error:", "Start of range is greater than end. %d > %d", chromStart, chromEnd); } /* convert the genomes requested to hgN format */ if(origGenome != NULL) origGenome = ccFreezeDbConversion(NULL, origGenome, organism); if(newGenome != NULL) newGenome = ccFreezeDbConversion(NULL, newGenome, organism); /* make sure that we've got valid arguments */ if((newGenome == NULL || origGenome == NULL || chrom == NULL || chromStart == -1 || chromEnd == -1) && (calledSelf)) webAbort("Error:", "Missing some inputs."); if( origGenome != NULL && sameString(origGenome, newGenome)) { struct dyString *warning = newDyString(1024); dyStringPrintf(warning, "Did you really want to convert from %s to %s (the same genome)?", ccFreezeDbConversion(origGenome, NULL, organism), \ ccFreezeDbConversion(newGenome, NULL, organism)); appendWarningMsg(warning->string); dyStringFree(&warning); } }
void outputSeq(DNA *dna, int dnaSize, boolean hiliteRange, long startRange, long endRange, FILE *out) /* Write out sequence. */ { struct dfm dfm; int i; char *seq = dna; int size = dnaSize; if (cgiBoolean("translate")) { int utr5 = 0; int maxProtSize = (dnaSize+2)/3; char *prot = needMem(maxProtSize + 1); if (cgiVarExists("utr5")) utr5 = cgiInt("utr5")-1; startRange -= utr5; endRange -= utr5; startRange /= 3; endRange /= 3; dna += utr5; seq = prot; for (size = 0; size < maxProtSize; ++size) { if ((*prot++ = lookupCodon(dna)) == 0) break; dna += 3; } *prot = 0; } initDfm(&dfm, 10, 50, TRUE, hiliteRange, startRange, endRange, out); for (i=0; i<size; ++i) dfmOut(&dfm, seq[i]); }
void initRegPlotOutput(struct dyString *script, char *fileName) /* Init eithe a pdf or bitmap device. */ { if(cgiBoolean("pdf")) dyStringPrintf(script, "pdf(file='%s', width=6, height=6);\n", fileName); else dyStringPrintf(script, "bitmap('%s', width=3, height=3, res=200);\n", fileName); }
void makePlotLink(char *type, char *fileName) /* Write out an image or pdf link depending on pdf or image. */ { if(cgiBoolean("pdf")) dyStringPrintf(html, "<a href='%s'>%s</a><br>\n", fileName, type); else dyStringPrintf(html, "<img src='%s'><br>\n", fileName); }
int main(int argc, char *argv[]) /* Process command line. */ { cgiSpoof(&argc, argv); zeroOk = cgiBoolean("zeroOk"); if (argc < 2) usage(); return endsInLf(argc-1, argv+1); }
int main(int argc, char *argv[]) /* Process command line. */ { cgiSpoof(&argc, argv); ver = cgiBoolean("ver"); if (argc < 2) usage(); agpCloneList(argc-1, argv+1); return 0; }
int main(int argc, char *argv[]) /* Process command line. */ { cgiSpoof(&argc, argv); stretch = cgiBoolean("stretch"); if (argc != 4) usage(); uniqSize(argv[1], argv[2], argv[3], cgiOptionalString("altFile")); return 0; }
void doMiddle() /* Write middle part of .html. */ { DNA *targetDna; char *chrom; int tStart, tEnd; struct xaAli *xa; int bothStart, bothEnd; char cbCosmidName[256]; char *s; /* Get input variables from CGI. */ char *qOrganism = cgiString("qOrganism"); char *tOrganism = cgiString("tOrganism"); char *query = cgiString("query"); char *target = cgiString("target"); char *strandString = cgiString("strand"); char strand = strandString[0]; boolean showSym = cgiBoolean("symbols"); boolean gotClickPos = cgiVarExists("clickPos"); double clickPos; if (gotClickPos) clickPos = cgiDouble("clickPos"); strcpy(cbCosmidName, query); if ((s = strrchr(cbCosmidName, '.')) != NULL) *s = 0; /* Get xaAli. */ xa = getOneXaAli(qOrganism, query); printf("<H2>Alignment of <I>C. briggsae</I> %s:%d-%d and <I>C. elegans</I> %s</H2>\n", cbCosmidName, xa->qStart, xa->qEnd, target); htmlParagraph("<I>C. briggsae</I> appears on top. Coding regions in <I>C. elegans</I> are in upper case."); /* Get display window. */ if (!wormParseChromRange(target, &chrom, &tStart, &tEnd)) errAbort("Target %s isn't formatted correctly", target); /* Figure out intersection of display window and xeno-alignment */ bothStart = max(xa->tStart, tStart); bothEnd = min(xa->tEnd, tEnd); /* Get upper-cased-exon target DNA. */ targetDna = wormChromPartExonsUpper(chrom, bothStart, bothEnd - bothStart); upcCorresponding(targetDna, bothEnd - bothStart, xa->tSym, bothStart - xa->tStart); printf("<TT><PRE>"); showTargetRange(xa, bothStart - xa->tStart, bothEnd-bothStart, strand, showSym); printf("</TT></PRE>"); }
int main(int argc, char *argv[]) /* Process command line. */ { cgiSpoof(&argc, argv); clTest = cgiBoolean("test"); clDots = cgiOptionalInt("dots", clDots); if (argc != 9) usage(); proteinDB = argv[8]; hgRefSeqMrna(argv[1], argv[2], argv[3], argv[4], argv[5], argv[6], argv[7]); return 0; }
int main(int argc, char *argv[]) { cgiSpoof(&argc, argv); if(argc == 1) usage(); else { suffix = cgiUsualString("suffix", "pairs.sample.norm"); doAll = cgiBoolean("doAll"); avgTranscriptomeExps(); } return 0; }
boolean bedPassFilters(struct bed *bed, struct altGraphX *ag, int cassetteEdge) { int minFlankingNum = cgiUsualInt("minFlankingNum", 2); int minFlankingSize = cgiUsualInt("minFlankingSize", 0); boolean mrnaFilter = cgiBoolean("mrnaFilter"); boolean passed = TRUE; int i =0; for(i = 0; i<bed->blockCount; i++) { if(bed->expIds[i] != cassetteEdge) { passed &= passFilter(bed, i, ag, minFlankingNum, minFlankingSize, mrnaFilter); } } return passed; }
void doMiddle(struct cart *theCart) /* Write header and body of html page. */ { char *userSeq; char *db, *organism; boolean clearUserSeq = cgiBoolean("Clear"); cart = theCart; dnaUtilOpen(); orgChange = sameOk(cgiOptionalString("changeInfo"),"orgChange"); if (orgChange) { cgiVarSet("db", hDefaultDbForGenome(cgiOptionalString("org"))); } getDbAndGenome(cart, &db, &organism, oldVars); char *oldDb = cloneString(db); findClosestServer(&db, &organism); /* Get sequence - from userSeq variable, or if * that is empty from a file. */ if (clearUserSeq) { cartSetString(cart, "userSeq", ""); cartSetString(cart, "seqFile", ""); } userSeq = cartUsualString(cart, "userSeq", ""); if (isEmpty(userSeq)) { userSeq = cartOptionalString(cart, "seqFile"); } if (isEmpty(userSeq) || orgChange) { cartWebStart(theCart, db, "%s BLAT Search", trackHubSkipHubName(organism)); if (differentString(oldDb, db)) printf("<HR><P><EM><B>Note:</B> BLAT search is not available for %s %s; " "defaulting to %s %s</EM></P><HR>\n", hGenome(oldDb), hFreezeDate(oldDb), organism, hFreezeDate(db)); askForSeq(organism,db); cartWebEnd(); } else { blatSeq(skipLeadingSpaces(userSeq), organism); } }
void loadDatabase(char *database, char *track, char *tabName) /* Load up database from tab-file. */ { struct sqlConnection *conn = sqlConnect(database); struct dyString *dy = newDyString(1024); if (!cgiBoolean("add")) { sqlDyStringPrintf(dy, createString, track); sqlRemakeTable(conn, track, dy->string); dyStringClear(dy); } sqlDyStringPrintf(dy, "load data local infile '%s' into table %s", tabName, track); sqlUpdate(conn, dy->string); dyStringFree(&dy); sqlDisconnect(&conn); }
int main(int argc, char *argv[]) { struct altGraphX *agList = NULL; int cassetteCount = 0; float minConfidence = 0; char *bedFileName = NULL; char *faFile = NULL; FILE *faOut = NULL; FILE *bedOut = NULL; boolean mrnaFilter = FALSE; float estPrior = 0.0; int minSize = 0; if(argc < 4) usage(); cgiSpoof(&argc, argv); warn("Loading graphs."); agList = altGraphXLoadAll(argv[1]); bedFileName = cgiOptionalString("bedFile"); minConfidence = cgiDouble("minConf"); db = cgiString("db"); faFile = cgiOptionalString("faFile"); estPrior = cgiOptionalDouble("estPrior", 10); minSize = cgiOptionalInt("minSize", 0); mrnaFilter = cgiBoolean("mrnaFilter"); if(mrnaFilter) loadMrnaHash(); warn("Counting cassette exons from %d clusters above confidence: %f", slCount(agList), minConfidence); if(bedFileName != NULL) { bedOut = mustOpen(bedFileName, "w"); printCommandState(argc, argv, bedOut); fprintf(bedOut, "track name=cass_conf-%4.2f_est-%3.2f description=\"spliceStats minConf=%4.2f estPrior=%3.2f minSize=%d\"\n", minConfidence, estPrior, minConfidence, estPrior, minSize); } if(faFile != NULL) faOut = mustOpen(faFile, "w"); cassetteCount = countCassetteExons(agList, minConfidence, faOut,bedOut ); carefulClose(&faOut); carefulClose(&bedOut); warn("%d cassette exons out of %d clusters in %s", cassetteCount, slCount(agList), argv[1]); altGraphXFreeList(&agList); return 0; }
void submitUrl(struct sqlConnection *conn) /* Submit validated manifest if it is not already in process. Show * progress once it is in progress. */ { /* Parse email and URL out of CGI vars. Do a tiny bit of error checking. */ char *url = trimSpaces(cgiString("url")); if (!stringIn("://", url)) errAbort("%s doesn't seem to be a valid URL, no '://'", url); /* Do some reality checks that email and URL actually exist. */ edwMustGetUserFromEmail(conn, userEmail); int sd = netUrlMustOpenPastHeader(url); close(sd); edwAddSubmitJob(conn, userEmail, url, cgiBoolean("update")); /* Give the system a half second to react and then put up status info about submission */ sleep1000(1000); monitorSubmission(conn); }
void doMatrixPlot(struct dyString *script, char *skipPSet, char *tableName, char *type, boolean linesOnly) /* Print out a matrix plot for a particular table. */ { struct tempName plotFile; struct dyString *query = newDyString(256); struct dMatrix *dM = NULL; char title[256]; safef(title, sizeof(title), "%s - %s", altEvent->geneName, type); if(cgiBoolean("pdf")) makeTempName(&plotFile, "sp", ".pdf"); else makeTempName(&plotFile, "sp", ".png"); touchBlank(plotFile.forCgi); makePlotLink(type, plotFile.forCgi); //dyStringPrintf(html, "<img src='%s'><br>\n", plotFile.forCgi); constructQueryForEvent(query, skipPSet, tableName); dM = dataFromTable(tableName, query->string); plotMatrixRows(dM, script, title, plotFile.forCgi, type, linesOnly) ; }
void intronSizes(char *database, char *table) /* intronSizes - Output list of intron sizes.. */ { struct dyString *query = newDyString(1024); struct sqlConnection *conn; struct sqlResult *sr; char **row; struct genePred *gp; int rowOffset; struct bed *bedList = NULL, *bed = NULL; hSetDb(database); rowOffset = hOffsetPastBin(NULL, table); conn = hAllocConn(database); sqlDyStringPrintf(query, "select * from %s", table); if (chromName != NULL) dyStringPrintf(query, " where chrom = '%s'", chromName); if (cgiBoolean("withUtr")) { dyStringPrintf(query, " %s txStart != cdsStart", (chromName == NULL ? "where" : "and")); } sr = sqlGetResult(conn, query->string); while ((row = sqlNextRow(sr)) != NULL) { gp = genePredLoad(row+rowOffset); genePredIntrons(gp, &bedList); slReverse(&bedList); for (bed = bedList ; bed != NULL ; bed=bed->next) bedTabOutN(bed,6, stdout); bedFreeList(&bedList); genePredFree(&gp); } sqlFreeResult(&sr); hFreeConn(&conn); }
void intronEnds(char *database, char *table) /* intronEnds - Gather stats on intron ends.. */ { struct dyString *query = newDyString(1024); struct sqlConnection *conn; struct sqlResult *sr; char **row; struct genePred *gp; int total = 0; int gtag = 0; int gcag = 0; int atac = 0; int ctac = 0; DNA ends[4]; int exonIx, txStart; struct dnaSeq *seq; int rowOffset; char strand; rowOffset = hOffsetPastBin(database, NULL, table); conn = hAllocConn(database); sqlDyStringPrintf(query, "select * from %s", table); if (chromName != NULL) dyStringPrintf(query, " where chrom = '%s'", chromName); if (cgiBoolean("withUtr")) { dyStringPrintf(query, " %s txStart != cdsStart", (chromName == NULL ? "where" : "and")); } sr = sqlGetResult(conn, query->string); while ((row = sqlNextRow(sr)) != NULL) { gp = genePredLoad(row+rowOffset); strand = gp->strand[0]; txStart = gp->txStart; seq = hDnaFromSeq(database, gp->chrom, txStart, gp->txEnd, dnaLower); for (exonIx=1; exonIx < gp->exonCount; ++exonIx) { ++total; memcpy(ends, seq->dna + gp->exonEnds[exonIx-1] - txStart, 2); memcpy(ends+2, seq->dna + gp->exonStarts[exonIx] - txStart - 2, 2); if (strand == '-') reverseComplement(ends, 4); if (ends[0] == 'g' && ends[1] == 't' && ends[2] == 'a' && ends[3] == 'g') ++gtag; if (ends[0] == 'g' && ends[1] == 'c' && ends[2] == 'a' && ends[3] == 'g') ++gcag; if (ends[0] == 'a' && ends[1] == 't' && ends[2] == 'a' && ends[3] == 'c') ++atac; if (ends[0] == 'c' && ends[1] == 't' && ends[2] == 'a' && ends[3] == 'c') ++ctac; } freeDnaSeq(&seq); genePredFree(&gp); } sqlFreeResult(&sr); hFreeConn(&conn); printf("gt/ag %d (%4.2f)\n", gtag, 100.0*gtag/total); printf("gc/ag %d (%4.2f)\n", gcag, 100.0*gcag/total); printf("at/ac %d (%4.2f)\n", atac, 100.0*atac/total); printf("ct/ac %d (%4.2f)\n", ctac, 100.0*ctac/total); printf("Total %d\n", total); }
void doMiddle() { char *seqName; boolean intronsLowerCase = TRUE; boolean intronsParenthesized = FALSE; boolean hiliteNear = FALSE; int startRange = 0; int endRange = 0; boolean gotRange = FALSE; struct dnaSeq *cdnaSeq; boolean isChromRange = FALSE; DNA *dna; char *translation = NULL; seqName = cgiString("geneName"); seqName = trimSpaces(seqName); if (cgiVarExists("intronsLowerCase")) intronsLowerCase = cgiBoolean("intronsLowerCase"); if (cgiVarExists("intronsParenthesized")) intronsParenthesized = cgiBoolean("intronsParenthesized"); if (cgiVarExists("startRange") && cgiVarExists("endRange" )) { startRange = cgiInt("startRange"); endRange = cgiInt("endRange"); gotRange = TRUE; } if (cgiVarExists("hiliteNear")) { hiliteNear = TRUE; } fprintf(stdout, "<P><TT>\n"); /* The logic here is a little complex to optimize speed. * If we can decide what type of thing the name refers to by * simply looking at the name we do. Otherwise we have to * search the database in various ways until we get a hit. */ if (wormIsNamelessCluster(seqName)) { isChromRange = TRUE; } else if (wormIsChromRange(seqName)) { isChromRange = TRUE; } else if (getWormGeneDna(seqName, &dna, TRUE)) { if (cgiBoolean("litLink")) { char nameBuf[64]; char *geneName = NULL; char *productName = NULL; char *coding; int transSize; struct wormCdnaInfo info; printf("<H3>Information and Links for %s</H3>\n", seqName); if (wormInfoForGene(seqName, &info)) { if (info.description) printf("<P>%s</P>\n", info.description); geneName = info.gene; productName = info.product; } else { if (wormIsGeneName(seqName)) geneName = seqName; else if (wormGeneForOrf(seqName, nameBuf, sizeof(nameBuf))) geneName = nameBuf; } coding = cloneUpperOnly(dna); transSize = 1 + (strlen(coding)+2)/3; translation = needMem(1+strlen(coding)/3); dnaTranslateSome(coding, translation, transSize); freez(&coding); if (geneName) { printf("<A HREF=\"http://www.ncbi.nlm.nih.gov/htbin-post/Entrez/query?form=4&db=m" "&term=C+elegans+%s&dispmax=50&relentrezdate=No+Limit\">", geneName); printf("PubMed search on gene: </A>%s<BR>\n", geneName); } if (productName) { char *encoded = cgiEncode(productName); printf("<A HREF=\"http://www.ncbi.nlm.nih.gov/htbin-post/Entrez/query?form=4&db=m" "&term=%s&dispmax=50&relentrezdate=No+Limit\">", encoded); printf("PubMed search on product:</A> %s<BR>\n", productName); freeMem(encoded); } /* Process name to get rid of isoform letter for Proteome. */ if (geneName) strcpy(nameBuf, geneName); else { strcpy(nameBuf, seqName); #ifdef NEVER /* Sometimes Proteome requires the letter after the orf name * in alt-spliced cases, sometimes it can't handle it.... */ nameLen = strlen(nameBuf); if (wormIsOrfName(nameBuf) && isalpha(nameBuf[nameLen-1])) { char *dotPos = strrchr(nameBuf, '.'); if (dotPos != NULL && isdigit(dotPos[1])) nameBuf[nameLen-1] = 0; } #endif /* NEVER */ } printf("<A HREF=\"http://www.wormbase.org/db/seq/sequence?name=%s;class=Sequence\">", seqName); printf("WormBase link on:</A> %s<BR>\n", seqName); printf("<A HREF=\"http://www.proteome.com/databases/WormPD/reports/%s.html\">", nameBuf); printf("Proteome link on:</A> %s<BR>\n<BR>\n", nameBuf); printf("<A HREF=#DNA>Genomic DNA Sequence</A><BR>\n"); if (hiliteNear) printf("<A HREF=\"#CLICKED\">Shortcut to where you clicked in gene</A><BR>"); printf("<A HREF=#protein>Translated Protein Sequence</A><BR>\n"); htmlHorizontalLine(); printf("<A NAME=DNA></A>"); printf("<H3>%s Genomic DNA sequence</H3>", seqName); } if (!intronsLowerCase) tolowers(dna); if (hiliteNear) { if (!gotRange) { double nearPos = cgiDouble("hiliteNear"); int rad = 5; int dnaSize = strlen(dna); long mid = (int)(dnaSize * nearPos); startRange = mid - rad; if (startRange < 0) startRange = 0; endRange = mid + rad; if (endRange >= dnaSize) endRange = dnaSize - 1; } } outputSeq(dna, strlen(dna), hiliteNear, startRange, endRange, stdout); freez(&dna); } else if (wormCdnaSeq(seqName, &cdnaSeq, NULL)) { outputSeq(cdnaSeq->dna, cdnaSeq->size, FALSE, 0, 0, stdout); } else { isChromRange = TRUE; } if (isChromRange) { char *chromId; int start, end; char strand = '+'; int size; if (!wormGeneRange(seqName, &chromId, &strand, &start, &end)) errAbort("Can't find %s",seqName); size = end - start; if (intronsLowerCase) dna = wormChromPartExonsUpper(chromId, start, size); else { dna = wormChromPart(chromId, start, size); touppers(dna); } if (cgiVarExists("strand")) strand = cgiString("strand")[0]; if (strand == '-') reverseComplement(dna, size); outputSeq(dna, size, FALSE, 0, 0, stdout); } if (translation != NULL) { htmlHorizontalLine(); printf("<A NAME=protein></A>"); printf("<H3>Translated Protein of %s</H3>\n", seqName); outputSeq(translation, strlen(translation), FALSE, 0, 0, stdout); freez(&translation); } fprintf(stdout, "</TT></P>\n"); }
void faNcbiToUcsc(char *inFile, char *out) /* faNcbiToUcsc - Convert FA file from NCBI to UCSC format.. */ { struct lineFile *lf = lineFileOpen(inFile, TRUE); char outName[512]; char *line; boolean split = cgiBoolean("split"); boolean ntLast = cgiBoolean("ntLast"); boolean encode = cgiBoolean("encode"); struct dnaSeq seq; FILE *f = NULL; char *wordBefore = cgiUsualString("wordBefore", "gb"); int wordIx = cgiUsualInt("wordIx", -1); char *e = NULL; char *nt = NULL; ZeroVar(&seq); if (split) makeDir(out); else f = mustOpen(out, "w"); while (lineFileNext(lf, &line, NULL)) { if (line[0] == '>') { if (ntLast || encode) { nt = NULL; if (ntLast) { e = NULL; nt = stringIn("NT_", line); if (nt == NULL) nt = stringIn("NG_", line); if (nt == NULL) nt = stringIn("NC_", line); if (nt == NULL) errAbort("Expecting NT_ NG_ or NC_in '%s'", line); e = strchr(nt, '|'); if (e != NULL) *e = 0; e = strchr(nt, ' '); if (e != NULL) *e = 0; } else { nt = stringIn("|EN", line); if (nt == NULL) errAbort("Expecting EN in %s", line); nt++; nt = firstWordInLine(nt); } if (split) { sprintf(outName, "%s/%s.fa", out, nt); carefulClose(&f); f = mustOpen(outName, "w"); } fprintf(f, ">%s\n", nt); } else { char *words[32]; int wordCount, i; char *accession = NULL; wordCount = chopString(line+1, "|", words, ArraySize(words)); if (wordIx >= 0) { if (wordIx >= wordCount) errAbort("Sorry only %d words", wordCount); accession = words[wordIx]; } else { for (i=0; i<wordCount-1; ++i) { if (sameString(words[i], wordBefore)) { accession = words[i+1]; break; } } if (accession == NULL) errAbort("Couldn't find '%s' line %d of %s", wordBefore, lf->lineIx, lf->fileName); } chopSuffix(accession); fprintf(f, ">%s\n", accession); } } else { fprintf(f, "%s\n", line); } } }
static void hgSeqConcatRegionsDb(char *db, char *chrom, int chromSize, char strand, char *name, int rCount, unsigned *rStarts, unsigned *rSizes, boolean *exonFlags, boolean *cdsFlags) /* Concatenate and print out dna for a series of regions. */ { // Note: this code use to generate different sequence ids if the global // database in hdb was different than the db parameter. This functionality // has been removed since the global database was removed and it didn't // appear to be used. struct dnaSeq *rSeq = NULL; struct dnaSeq *cSeq = NULL; char recName[256]; int seqStart, seqEnd; int offset, cSize; int i; boolean isRc = (strand == '-') || cgiBoolean("hgSeq.revComp"); boolean maskRep = cgiBoolean("hgSeq.maskRepeats"); int padding5 = cgiOptionalInt("hgSeq.padding5", 0); int padding3 = cgiOptionalInt("hgSeq.padding3", 0); char *casing = cgiString("hgSeq.casing"); char *repMasking = cgiString("hgSeq.repMasking"); char *granularity = cgiOptionalString("hgSeq.granularity"); boolean concatRegions = granularity && sameString("gene", granularity); if (rCount < 1) return; /* Don't support padding if granularity is gene (i.e. concat'ing all). */ if (concatRegions) { padding5 = padding3 = 0; } i = rCount - 1; seqStart = rStarts[0] - (isRc ? padding3 : padding5); seqEnd = rStarts[i] + rSizes[i] + (isRc ? padding5 : padding3); /* Padding might push us off the edge of the chrom; if so, truncate: */ if (seqStart < 0) { if (isRc) padding3 += seqStart; else padding5 += seqStart; seqStart = 0; } /* if we know the chromSize, don't pad out beyond it */ if ((chromSize > 0) && (seqEnd > chromSize)) { if (isRc) padding5 += (chromSize - seqEnd); else padding3 += (chromSize - seqEnd); seqEnd = chromSize; } if (seqEnd <= seqStart) { printf("# Null range for %s_%s (range=%s:%d-%d 5'pad=%d 3'pad=%d) (may indicate a query-side insert)\n", db, name, chrom, seqStart+1, seqEnd, padding5, padding3); return; } if (maskRep) { rSeq = hDnaFromSeq(db, chrom, seqStart, seqEnd, dnaMixed); if (sameString(repMasking, "N")) lowerToN(rSeq->dna, strlen(rSeq->dna)); if (!sameString(casing, "upper")) tolowers(rSeq->dna); } else if (sameString(casing, "upper")) rSeq = hDnaFromSeq(db, chrom, seqStart, seqEnd, dnaUpper); else rSeq = hDnaFromSeq(db, chrom, seqStart, seqEnd, dnaLower); /* Handle casing and compute size of concatenated sequence */ cSize = 0; for (i=0; i < rCount; i++) { if ((sameString(casing, "exon") && exonFlags[i]) || (sameString(casing, "cds") && cdsFlags[i])) { int rStart = rStarts[i] - seqStart; toUpperN(rSeq->dna+rStart, rSizes[i]); } cSize += rSizes[i]; } cSize += (padding5 + padding3); AllocVar(cSeq); cSeq->dna = needLargeMem(cSize+1); cSeq->size = cSize; offset = 0; for (i=0; i < rCount; i++) { int start = rStarts[i] - seqStart; int size = rSizes[i]; if (i == 0) { start -= (isRc ? padding3 : padding5); assert(start == 0); size += (isRc ? padding3 : padding5); } if (i == rCount-1) { size += (isRc ? padding5 : padding3); } memcpy(cSeq->dna+offset, rSeq->dna+start, size); offset += size; } assert(offset == cSeq->size); cSeq->dna[offset] = 0; freeDnaSeq(&rSeq); if (isRc) reverseComplement(cSeq->dna, cSeq->size); safef(recName, sizeof(recName), "%s_%s range=%s:%d-%d 5'pad=%d 3'pad=%d " "strand=%c repeatMasking=%s", db, name, chrom, seqStart+1, seqEnd, padding5, padding3, (isRc ? '-' : '+'), (maskRep ? repMasking : "none")); faWriteNext(stdout, recName, cSeq->dna, cSeq->size); freeDnaSeq(&cSeq); }
void blatSeq(char *userSeq, char *organism) /* Blat sequence user pasted in. */ { FILE *f; struct dnaSeq *seqList = NULL, *seq; struct tempName pslTn, faTn; int maxSingleSize, maxTotalSize, maxSeqCount; int minSingleSize = minMatchShown; char *genome, *db; char *type = cgiString("type"); char *seqLetters = cloneString(userSeq); struct serverTable *serve; int conn; int oneSize, totalSize = 0, seqCount = 0; boolean isTx = FALSE; boolean isTxTx = FALSE; boolean txTxBoth = FALSE; struct gfOutput *gvo; boolean qIsProt = FALSE; enum gfType qType, tType; struct hash *tFileCache = gfFileCacheNew(); boolean feelingLucky = cgiBoolean("Lucky"); getDbAndGenome(cart, &db, &genome, oldVars); if(!feelingLucky) cartWebStart(cart, db, "%s BLAT Results", trackHubSkipHubName(organism)); /* Load user sequence and figure out if it is DNA or protein. */ if (sameWord(type, "DNA")) { seqList = faSeqListFromMemText(seqLetters, TRUE); uToT(seqList); isTx = FALSE; } else if (sameWord(type, "translated RNA") || sameWord(type, "translated DNA")) { seqList = faSeqListFromMemText(seqLetters, TRUE); uToT(seqList); isTx = TRUE; isTxTx = TRUE; txTxBoth = sameWord(type, "translated DNA"); } else if (sameWord(type, "protein")) { seqList = faSeqListFromMemText(seqLetters, FALSE); isTx = TRUE; qIsProt = TRUE; } else { seqList = faSeqListFromMemTextRaw(seqLetters); isTx = !seqIsDna(seqList); if (!isTx) { for (seq = seqList; seq != NULL; seq = seq->next) { seq->size = dnaFilteredSize(seq->dna); dnaFilter(seq->dna, seq->dna); toLowerN(seq->dna, seq->size); subChar(seq->dna, 'u', 't'); } } else { for (seq = seqList; seq != NULL; seq = seq->next) { seq->size = aaFilteredSize(seq->dna); aaFilter(seq->dna, seq->dna); toUpperN(seq->dna, seq->size); } qIsProt = TRUE; } } if (seqList != NULL && seqList->name[0] == 0) { freeMem(seqList->name); seqList->name = cloneString("YourSeq"); } trimUniq(seqList); /* If feeling lucky only do the first on. */ if(feelingLucky && seqList != NULL) { seqList->next = NULL; } /* Figure out size allowed. */ maxSingleSize = (isTx ? 10000 : 75000); maxTotalSize = maxSingleSize * 2.5; #ifdef LOWELAB maxSeqCount = 200; #else maxSeqCount = 25; #endif /* Create temporary file to store sequence. */ trashDirFile(&faTn, "hgSs", "hgSs", ".fa"); faWriteAll(faTn.forCgi, seqList); /* Create a temporary .psl file with the alignments against genome. */ trashDirFile(&pslTn, "hgSs", "hgSs", ".pslx"); f = mustOpen(pslTn.forCgi, "w"); gvo = gfOutputPsl(0, qIsProt, FALSE, f, FALSE, TRUE); serve = findServer(db, isTx); /* Write header for extended (possibly protein) psl file. */ if (isTx) { if (isTxTx) { qType = gftDnaX; tType = gftDnaX; } else { qType = gftProt; tType = gftDnaX; } } else { qType = gftDna; tType = gftDna; } pslxWriteHead(f, qType, tType); if (qType == gftProt) { minSingleSize = 14; } else if (qType == gftDnaX) { minSingleSize = 36; } /* Loop through each sequence. */ for (seq = seqList; seq != NULL; seq = seq->next) { printf(" "); fflush(stdout); /* prevent apache cgi timeout by outputting something */ oneSize = realSeqSize(seq, !isTx); if ((seqCount&1) == 0) // Call bot delay every 2nd time starting with first time hgBotDelay(); if (++seqCount > maxSeqCount) { warn("More than 25 input sequences, stopping at %s.", seq->name); break; } if (oneSize > maxSingleSize) { warn("Sequence %s is %d letters long (max is %d), skipping", seq->name, oneSize, maxSingleSize); continue; } if (oneSize < minSingleSize) { warn("Warning: Sequence %s is only %d letters long (%d is the recommended minimum)", seq->name, oneSize, minSingleSize); // we could use "continue;" here to actually enforce skipping, // but let's give the short sequence a chance, it might work. // minimum possible length = tileSize+stepSize, so mpl=16 for dna stepSize=5, mpl=10 for protein. if (qIsProt && oneSize < 1) // protein does not tolerate oneSize==0 continue; } totalSize += oneSize; if (totalSize > maxTotalSize) { warn("Sequence %s would take us over the %d letter limit, stopping here.", seq->name, maxTotalSize); break; } conn = gfConnect(serve->host, serve->port); if (isTx) { gvo->reportTargetStrand = TRUE; if (isTxTx) { gfAlignTransTrans(&conn, serve->nibDir, seq, FALSE, 5, tFileCache, gvo, !txTxBoth); if (txTxBoth) { reverseComplement(seq->dna, seq->size); conn = gfConnect(serve->host, serve->port); gfAlignTransTrans(&conn, serve->nibDir, seq, TRUE, 5, tFileCache, gvo, FALSE); } } else { gfAlignTrans(&conn, serve->nibDir, seq, 5, tFileCache, gvo); } } else { gfAlignStrand(&conn, serve->nibDir, seq, FALSE, minMatchShown, tFileCache, gvo); reverseComplement(seq->dna, seq->size); conn = gfConnect(serve->host, serve->port); gfAlignStrand(&conn, serve->nibDir, seq, TRUE, minMatchShown, tFileCache, gvo); } gfOutputQuery(gvo, f); } carefulClose(&f); showAliPlaces(pslTn.forCgi, faTn.forCgi, serve->db, qType, tType, organism, feelingLucky); if(!feelingLucky) cartWebEnd(); gfFileCacheFree(&tFileCache); }
int hgSeqBed(char *db, struct hTableInfo *hti, struct bed *bedList) /* Print out dna sequence from the given database of all items in bedList. * hti describes the bed-compatibility level of bedList items. * Returns number of FASTA records printed out. */ { struct bed *bedItem; char itemName[128]; boolean isRc; int count; unsigned *starts = NULL; unsigned *sizes = NULL; boolean *exonFlags = NULL; boolean *cdsFlags = NULL; int i, rowCount, totalCount; boolean promoter = cgiBoolean("hgSeq.promoter"); boolean intron = cgiBoolean("hgSeq.intron"); boolean utrExon5 = cgiBoolean("hgSeq.utrExon5"); boolean utrIntron5 = utrExon5 && intron; boolean cdsExon = cgiBoolean("hgSeq.cdsExon"); boolean cdsIntron = cdsExon && intron; boolean utrExon3 = cgiBoolean("hgSeq.utrExon3"); boolean utrIntron3 = utrExon3 && intron; boolean downstream = cgiBoolean("hgSeq.downstream"); int promoterSize = cgiOptionalInt("hgSeq.promoterSize", 0); int downstreamSize = cgiOptionalInt("hgSeq.downstreamSize", 0); char *granularity = cgiOptionalString("hgSeq.granularity"); boolean concatRegions = granularity && sameString("gene", granularity); boolean concatAdjacent = (cgiBooleanDefined("hgSeq.splitCDSUTR") && (! cgiBoolean("hgSeq.splitCDSUTR"))); boolean isCDS, doIntron; boolean canDoUTR, canDoIntrons; /* catch a special case: introns selected, but no exons -> include all introns * instead of qualifying intron with exon flags. */ if (intron && !(utrExon5 || cdsExon || utrExon3)) { utrIntron5 = cdsIntron = utrIntron3 = TRUE; } canDoUTR = hti->hasCDS; canDoIntrons = hti->hasBlocks; rowCount = totalCount = 0; for (bedItem = bedList; bedItem != NULL; bedItem = bedItem->next) { if (bedItem->blockCount == 0) /* An intersection may have made hti unreliable. */ canDoIntrons = FALSE; rowCount++; int chromSize = hgSeqChromSize(db, bedItem->chrom); // bed: translate relative starts to absolute starts for (i=0; i < bedItem->blockCount; i++) { bedItem->chromStarts[i] += bedItem->chromStart; } isRc = (bedItem->strand[0] == '-'); // here's the max # of feature regions: if (canDoIntrons) count = 4 + (2 * bedItem->blockCount); else count = 5; maxStartsOffset = count-1; starts = needMem(sizeof(unsigned) * count); sizes = needMem(sizeof(unsigned) * count); exonFlags = needMem(sizeof(boolean) * count); cdsFlags = needMem(sizeof(boolean) * count); // build up a list of selected regions count = 0; if (!isRc && promoter && (promoterSize > 0)) { addFeature(&count, starts, sizes, exonFlags, cdsFlags, (bedItem->chromStart - promoterSize), promoterSize, FALSE, FALSE, chromSize); } else if (isRc && downstream && (downstreamSize > 0)) { addFeature(&count, starts, sizes, exonFlags, cdsFlags, (bedItem->chromStart - downstreamSize), downstreamSize, FALSE, FALSE, chromSize); } if (canDoIntrons && canDoUTR) { for (i=0; i < bedItem->blockCount; i++) { if ((bedItem->chromStarts[i] + bedItem->blockSizes[i]) <= bedItem->thickStart) { if ((!isRc && utrExon5) || (isRc && utrExon3)) { addFeature(&count, starts, sizes, exonFlags, cdsFlags, bedItem->chromStarts[i], bedItem->blockSizes[i], TRUE, FALSE, chromSize); } if (((!isRc && utrIntron5) || (isRc && utrIntron3)) && (i < bedItem->blockCount - 1)) { addFeature(&count, starts, sizes, exonFlags, cdsFlags, (bedItem->chromStarts[i] + bedItem->blockSizes[i]), (bedItem->chromStarts[i+1] - bedItem->chromStarts[i] - bedItem->blockSizes[i]), FALSE, FALSE, chromSize); } } else if (bedItem->chromStarts[i] < bedItem->thickEnd) { if ((bedItem->chromStarts[i] < bedItem->thickStart) && ((bedItem->chromStarts[i] + bedItem->blockSizes[i]) > bedItem->thickEnd)) { if ((!isRc && utrExon5) || (isRc && utrExon3)) { addFeature(&count, starts, sizes, exonFlags, cdsFlags, bedItem->chromStarts[i], (bedItem->thickStart - bedItem->chromStarts[i]), TRUE, FALSE, chromSize); } if (cdsExon) { addFeature(&count, starts, sizes, exonFlags, cdsFlags, bedItem->thickStart, (bedItem->thickEnd - bedItem->thickStart), TRUE, TRUE, chromSize); } if ((!isRc && utrExon3) || (isRc && utrExon5)) { addFeature(&count, starts, sizes, exonFlags, cdsFlags, bedItem->thickEnd, (bedItem->chromStarts[i] + bedItem->blockSizes[i] - bedItem->thickEnd), TRUE, FALSE, chromSize); } } else if (bedItem->chromStarts[i] < bedItem->thickStart) { if ((!isRc && utrExon5) || (isRc && utrExon3)) { addFeature(&count, starts, sizes, exonFlags, cdsFlags, bedItem->chromStarts[i], (bedItem->thickStart - bedItem->chromStarts[i]), TRUE, FALSE, chromSize); } if (cdsExon) { addFeature(&count, starts, sizes, exonFlags, cdsFlags, bedItem->thickStart, (bedItem->chromStarts[i] + bedItem->blockSizes[i] - bedItem->thickStart), TRUE, TRUE, chromSize); } } else if ((bedItem->chromStarts[i] + bedItem->blockSizes[i]) > bedItem->thickEnd) { if (cdsExon) { addFeature(&count, starts, sizes, exonFlags, cdsFlags, bedItem->chromStarts[i], (bedItem->thickEnd - bedItem->chromStarts[i]), TRUE, TRUE, chromSize); } if ((!isRc && utrExon3) || (isRc && utrExon5)) { addFeature(&count, starts, sizes, exonFlags, cdsFlags, bedItem->thickEnd, (bedItem->chromStarts[i] + bedItem->blockSizes[i] - bedItem->thickEnd), TRUE, FALSE, chromSize); } } else if (cdsExon) { addFeature(&count, starts, sizes, exonFlags, cdsFlags, bedItem->chromStarts[i], bedItem->blockSizes[i], TRUE, TRUE, chromSize); } isCDS = ! ((bedItem->chromStarts[i] + bedItem->blockSizes[i]) > bedItem->thickEnd); doIntron = (isCDS ? cdsIntron : ((!isRc) ? utrIntron3 : utrIntron5)); if (doIntron && (i < bedItem->blockCount - 1)) { addFeature(&count, starts, sizes, exonFlags, cdsFlags, (bedItem->chromStarts[i] + bedItem->blockSizes[i]), (bedItem->chromStarts[i+1] - bedItem->chromStarts[i] - bedItem->blockSizes[i]), FALSE, isCDS, chromSize); } } else { if ((!isRc && utrExon3) || (isRc && utrExon5)) { addFeature(&count, starts, sizes, exonFlags, cdsFlags, bedItem->chromStarts[i], bedItem->blockSizes[i], TRUE, FALSE, chromSize); } if (((!isRc && utrIntron3) || (isRc && utrIntron5)) && (i < bedItem->blockCount - 1)) { addFeature(&count, starts, sizes, exonFlags, cdsFlags, (bedItem->chromStarts[i] + bedItem->blockSizes[i]), (bedItem->chromStarts[i+1] - bedItem->chromStarts[i] - bedItem->blockSizes[i]), FALSE, FALSE, chromSize); } } } } else if (canDoIntrons) { for (i=0; i < bedItem->blockCount; i++) { if (cdsExon) { addFeature(&count, starts, sizes, exonFlags, cdsFlags, bedItem->chromStarts[i], bedItem->blockSizes[i], TRUE, FALSE, chromSize); } if (cdsIntron && (i < bedItem->blockCount - 1)) { addFeature(&count, starts, sizes, exonFlags, cdsFlags, (bedItem->chromStarts[i] + bedItem->blockSizes[i]), (bedItem->chromStarts[i+1] - bedItem->chromStarts[i] - bedItem->blockSizes[i]), FALSE, FALSE, chromSize); } } } else if (canDoUTR) { if (bedItem->thickStart == 0 && bedItem->thickEnd == 0) bedItem->thickStart = bedItem->thickEnd = bedItem->chromStart; if ((!isRc && utrExon5) || (isRc && utrExon3)) { addFeature(&count, starts, sizes, exonFlags, cdsFlags, bedItem->chromStart, (bedItem->thickStart - bedItem->chromStart), TRUE, FALSE, chromSize); } if (cdsExon) { addFeature(&count, starts, sizes, exonFlags, cdsFlags, bedItem->thickStart, (bedItem->thickEnd - bedItem->thickStart), TRUE, TRUE, chromSize); } if ((!isRc && utrExon3) || (isRc && utrExon5)) { addFeature(&count, starts, sizes, exonFlags, cdsFlags, bedItem->thickEnd, (bedItem->chromEnd - bedItem->thickEnd), TRUE, FALSE, chromSize); } } else { addFeature(&count, starts, sizes, exonFlags, cdsFlags, bedItem->chromStart, (bedItem->chromEnd - bedItem->chromStart), TRUE, FALSE, chromSize); } if (!isRc && downstream && (downstreamSize > 0)) { addFeature(&count, starts, sizes, exonFlags, cdsFlags, bedItem->chromEnd, downstreamSize, FALSE, FALSE, chromSize); } else if (isRc && promoter && (promoterSize > 0)) { addFeature(&count, starts, sizes, exonFlags, cdsFlags, bedItem->chromEnd, promoterSize, FALSE, FALSE, chromSize); } snprintf(itemName, sizeof(itemName), "%s_%s", hti->rootName, bedItem->name); hgSeqRegionsAdjDb(db, bedItem->chrom, chromSize, bedItem->strand[0], itemName, concatRegions, concatAdjacent, count, starts, sizes, exonFlags, cdsFlags); totalCount += count; freeMem(starts); freeMem(sizes); freeMem(exonFlags); freeMem(cdsFlags); } return totalCount; }
void doRegressionPlot(struct dyString *script, char *skipPset, char *incTable, char *skipTable, char *geneTable) /* Put up a regression plot. */ { struct dMatrix *skip = NULL, *inc = NULL, *gene = NULL; char query[256]; int i = 0; struct tempName regPlot; double thresh = .75; if(cgiBoolean("allPoints")) thresh = -1; if(cgiBoolean("pdf")) makeTempName(®Plot, "sp", ".pdf"); else makeTempName(®Plot, "sp", ".png"); touchBlank(regPlot.forCgi); sqlSafef(query, sizeof(query), "select * from %s where name like '%%%s%%';", incTable, skipPset); inc = dataFromTable(incTable, query); sqlSafef(query, sizeof(query), "select * from %s where name like '%%%s%%';", skipTable, skipPset); skip = dataFromTable(skipTable, query); sqlSafef(query, sizeof(query), "select * from %s where name like '%%%s%%';", geneTable, skipPset); gene = dataFromTable(geneTable, query); initRegPlotOutput(script, regPlot.forCgi); dyStringPrintf(script, "incDat = c("); for(i = 0; i< inc->colCount; i++) dyStringPrintf(script, "%.4f,", inc->matrix[0][i]); dyStringPrintf(script, ");\n"); dyStringPrintf(script, "skipDat = c("); for(i = 0; i< skip->colCount; i++) dyStringPrintf(script, "%.4f,", skip->matrix[0][i]); dyStringPrintf(script, ");\n"); dyStringPrintf(script, "geneDat = c("); for(i = 0; i< gene->colCount; i++) dyStringPrintf(script, "%.4f,", gene->matrix[0][i]); dyStringPrintf(script, ");\n"); dyStringPrintf(script, "expressed = geneDat > %.4f;\n", thresh); dyStringPrintf(script, abbv); if(cgiBoolean("muscle")) { dyStringPrintf(script, isMuscle); dyStringPrintf(script, plotRegression, altEvent->geneName, "Muscle", "Muscle"); } else { dyStringPrintf(script, isBrain); dyStringPrintf(script, plotRegression, altEvent->geneName, "Brain", "Brain"); } closePlotOutput(script); makePlotLink("Include vs. Skip", regPlot.forCgi); dyStringPrintf(html, "<table width=600><tr><td>\n"); dyStringPrintf(html, "Tissues for which the gene expression was too low to be considered are in lighter colors.<br><br>\n"); dyStringPrintf(html, "%s\n", key); dyStringPrintf(html, "</td></tr></table><br>\n"); //dyStringPrintf(html, "<img src='%s'><br>\n", regPlot.forCgi); }