void makeHtml(char *fileName) /* Make html page with links for Tom. */ { struct tomRough *tr; int start, end; char chrom[256]; FILE *f = mustOpen(fileName, "w"); htmStart(f, "roughly mapped disease genes"); fprintf(f, "<PRE>"); for (tr = roughList; tr != NULL; tr = tr->next) { sprintf(chrom, "chr%s", tr->chromosome); bandRange(chrom, tr->startBand, tr->endBand, &start, &end); fprintf(f, "<A HREF=\"http://www.ncbi.nlm.nih.gov/entrez/dispomim.cgi?id=%d\">", tr->omimId); fprintf(f, "OMIM %d</A>\t", tr->omimId); fprintf(f, "<A HREF=\"http://genome.ucsc.edu/cgi-bin/hgTracks?position=%s:%d-%d&pix=800\">", chrom, start, end); fprintf(f, "%s:%d-%d</A>\t", chrom, start, end); fprintf(f, "%s,%s\t", tr->startBand, tr->endBand); fprintf(f, "%s\n", tr->description); } htmEnd(f); fclose(f); }
int main(int argc, char *argv[]) { char *inName, *outName; FILE *in, *out; char line[256]; struct ernaHit *list = NULL, *el; struct ernaClump *clumpList, *clump; int clumpCount = 0; if (argc != 3) { errAbort("sorterna - Process raw editRna file into a nice html.\n" "usage: sorterna editrna.raw editrna.html"); } inName = argv[1]; outName = argv[2]; in = mustOpen(inName, "r"); out = mustOpen(outName, "w"); /* Read in whole file. */ while (fgets(line, sizeof(line), in)) { el = parseHit(line); if (el->totalCdna >= el->commonErr + 2) slAddHead(&list, el); } fclose(in); printf("Got %d raw elements\n", slCount(list)); clumpList = clumpHits(list); printf("Merged into %d clumps\n", slCount(clumpList)); slSort(&clumpList, cmpScore); htmStart(out, "RNA Editing Candidates"); fprintf(out, "<H2>RNA Editing Candidates</H2>\n"); fprintf(out, "<TT><PRE>"); for (clump = clumpList; clump != NULL; clump = clump->next) { ++clumpCount; printf("Analysing clump %d\n", clumpCount); fprintf(out, "<P><HR ALIGN=CENTER></P>"); fprintf(out, "<A HREF=\"../cgi-bin/tracks.exe?where=%s:%d-%d&hilite=%d-%d\">%s:%d-%d</A> ", clump->chrom, clump->start-1000, clump->end+1000, clump->start, clump->end+1, clump->chrom, clump->start, clump->end+1); fprintf(out, " score %f elements %d\n", clump->score, slCount(clump->hits)); showClump(clump, out); } htmEnd(out); return 0; }
void outputPicks(struct scoredWindow *winList, char *database, struct hash *chromLimitHash, struct stats *stats, FILE *f) /* Output picked regions. */ { struct scoredWindow *strata[strataCount][strataCount]; double geneCuts[strataCount], consCuts[strataCount]; struct scoredWindow *win, *next; int geneIx, consIx, pickIx; FILE *html = NULL; struct region *avoidList = NULL, *avoid; /* Get list of regions to avoid */ if (avoidFile != NULL) avoidList = loadRegionFile(database, avoidFile); if (htmlOutput != NULL) { html = mustOpen(htmlOutput, "w"); htmStart(html, "Random Regions for 1% Project"); } fprintf(f, "Cuts at:\n"); calcCuts(stats->consCounts, histSize, stats->totalConsCount, 1.0, consCuts, strataCount); uglyf("cons %f %f %f\n", consCuts[0], consCuts[1], consCuts[2]); fprintf(f, "cons %f %f %f\n", consCuts[0], consCuts[1], consCuts[2]); calcCuts(stats->geneCounts, histSize, stats->totalGeneCount, 1.0/geneScale, geneCuts, strataCount); uglyf("gene %f %f %f\n", geneCuts[0], geneCuts[1], geneCuts[2]); uglyf("gene %f %f %f\n", geneCuts[0], geneCuts[1], geneCuts[2]); fprintf(f, "\n"); /* Move winList to strata. */ zeroBytes(strata, sizeof(strata)); for (win = winList; win != NULL; win = next) { /* Calculate appropriate strata and move. */ next = win->next; consIx = cutIx(consCuts, strataCount, win->consRatio); geneIx = cutIx(geneCuts, strataCount, win->geneRatio); slAddHead(&strata[consIx][geneIx], win); } /* Shuffle strata and output first picks in each. */ srand(randSeed); for (consIx=strataCount-1; consIx>=0; --consIx) { for (geneIx=strataCount-1; geneIx>=0; --geneIx) { int cs=0, gs=0; if (geneIx>0) gs = round(100*genoCuts[geneIx-1]); if (consIx>0) cs = round(100*genoCuts[consIx-1]); fprintf(f, "consNonTx %d%%-%d%%, gene %d%%-%d%%\n", cs, round(100*genoCuts[consIx]), gs, round(100*genoCuts[geneIx])); if (html) { fprintf(html, "<H2>consNonTx %d%% - %d%%, gene %d%% - %d%%</H3>\n", cs, round(100*genoCuts[consIx]), gs, round(100*genoCuts[geneIx])); } shuffleList(&strata[consIx][geneIx]); pickIx = 0; for (win = strata[consIx][geneIx]; win != NULL; win = win->next) { int end = win->start + bigWinSize; if (!hitsRegions(win->chrom, win->start, end, avoidList)) { if (withinChromLimits(chromLimitHash, win->chrom)) { AllocVar(avoid); avoid->chrom = cloneString(win->chrom); avoid->start = win->start; avoid->end = end; slAddHead(&avoidList, avoid); fprintf(f, "%s:%d-%d\t", win->chrom, win->start+1, end); fprintf(f, "consNonTx %4.1f%%, gene %4.1f%%\n", 100*win->consRatio, 100*win->geneRatio); if (html) { fprintf(html, "<A HREF=\"http://genome.ucsc.edu/cgi-bin/"); fprintf(html, "hgTracks?db=%s&position=%s:%d-%d\">", database, win->chrom, win->start+1, end); fprintf(html, "%s:%d-%d</A>", win->chrom, win->start+1, end); fprintf(html, "\tconsNonTx %4.1f%%, gene %4.1f%%<BR>\n", 100*win->consRatio, 100*win->geneRatio); } if (++pickIx >= picksPer) break; } } } fprintf(f, "\n"); } } if (html) { htmEnd(html); carefulClose(&html); } }
int main(int argc, char *argv[]) { char *genoListName; char *cdnaListName; char *oocFileName; char *pairFileName; struct patSpace *patSpace; long startTime, endTime; char **genoList; int genoListSize; char *genoListBuf; char **cdnaList; int cdnaListSize; char *cdnaListBuf; char *genoName; int i; int estIx = 0; struct dnaSeq **seqListList = NULL, *seq; static char hitFileName[512], mergerFileName[512], okFileName[512]; char *outRoot; struct hash *pairHash; if (dumpMe) { bigHtmlFile = mustOpen("C:\\inetpub\\wwwroot\\test\\patAli.html", "w"); littleHtmlFile = mustOpen("C:\\inetpub\\wwwroot\\test\\patSpace.html", "w"); htmStart(bigHtmlFile, "PatSpace Alignments"); htmStart(littleHtmlFile, "PatSpace Index"); } if ((hostName = getenv("HOST")) == NULL) hostName = ""; if (argc != 6) usage(); pushWarnHandler(patSpaceWarnHandler); startTime = clock1000(); dnaUtilOpen(); makePolys(); genoListName = argv[1]; cdnaListName = argv[2]; oocFileName = argv[3]; pairFileName = argv[4]; outRoot = argv[5]; sprintf(hitFileName, "%s.hit", outRoot); sprintf(mergerFileName, "%s.glu", outRoot); sprintf(okFileName, "%s.ok", outRoot); readAllWords(genoListName, &genoList, &genoListSize, &genoListBuf); readAllWords(cdnaListName, &cdnaList, &cdnaListSize, &cdnaListBuf); pairHash = makePairHash(pairFileName); hitOut = mustOpen(hitFileName, "w"); mergerOut = mustOpen(mergerFileName, "w"); dumpOut = mustOpen("dump.out", "w"); seqListList = needMem(genoListSize*sizeof(seqListList[0]) ); fprintf(hitOut, "Pattern space 0.2 cDNA matcher\n"); fprintf(hitOut, "cDNA files: ", cdnaListSize); for (i=0; i<cdnaListSize; ++i) fprintf(hitOut, " %s", cdnaList[i]); fprintf(hitOut, "\n"); fprintf(hitOut, "%d genomic files\n", genoListSize); for (i=0; i<genoListSize; ++i) { genoName = genoList[i]; if (!startsWith("//", genoName) ) { seqListList[i] = seq = faReadAllDna(genoName); fprintf(hitOut, "%d els in %s ", slCount(seq), genoList[i]); for (; seq != NULL; seq = seq->next) fprintf(hitOut, "%d ", seq->size); fprintf(hitOut, "\n"); } } patSpace = makePatSpace(seqListList, genoListSize, oocFileName); for (i=0; i<cdnaListSize; ++i) { FILE *f; char *estFileName; DNA *dna; char *estName; int size; int c; int maxSizeForFuzzyFind = 20000; int dotCount = 0; estFileName = cdnaList[i]; if (startsWith("//", estFileName) ) continue; f = mustOpen(estFileName, "rb"); while ((c = fgetc(f)) != EOF) if (c == '>') break; printf("%s", cdnaList[i]); fflush(stdout); while (fastFaReadNext(f, &dna, &size, &estName)) { aliSeqName = estName; if (size < maxSizeForFuzzyFind) /* Some day need to fix this somehow... */ { struct hashEl *hel; struct cdnaAliList *calList = NULL; hel = hashLookup(pairHash, estName); if (hel != NULL) /* Do pair processing. */ { struct estPair *ep; struct seq *thisSeq, *otherSeq; ep = hel->val; if (hel->name == ep->name3) { thisSeq = &ep->seq3; otherSeq = &ep->seq5; } else { thisSeq = &ep->seq5; otherSeq = &ep->seq3; } if (otherSeq->dna == NULL) /* First in pair - need to save sequence. */ { thisSeq->size = size; thisSeq->dna = needMem(size); memcpy(thisSeq->dna, dna, size); } else /* Second in pair - do gluing and free partner. */ { char mergedName[64]; thisSeq->dna = dna; thisSeq->size = size; sprintf(mergedName, "%s_AND_%s", ep->name5, ep->name3); patSpaceFindOne(patSpace, ep->seq5.dna, ep->seq5.size, '+', '5', ep->name5, &calList); reverseComplement(ep->seq5.dna, ep->seq5.size); patSpaceFindOne(patSpace, ep->seq5.dna, ep->seq5.size, '-', '5', ep->name5, &calList); patSpaceFindOne(patSpace, ep->seq3.dna, ep->seq3.size, '+', '3', ep->name3, &calList); reverseComplement(ep->seq3.dna, ep->seq3.size); patSpaceFindOne(patSpace, ep->seq3.dna, ep->seq3.size, '-', '3', ep->name3, &calList); slReverse(&calList); writeMergers(calList, mergedName, genoList); freez(&otherSeq->dna); thisSeq->dna = NULL; thisSeq->size =otherSeq->size = 0; } } else { patSpaceFindOne(patSpace, dna, size, '+', '5', estName, &calList); reverseComplement(dna, size); patSpaceFindOne(patSpace, dna, size, '-', '5', estName, &calList); slReverse(&calList); writeMergers(calList, estName, genoList); } ++estIx; if ((estIx & 0xfff) == 0) { printf("."); ++dotCount; fflush(stdout); } } } printf("\n"); } aliSeqName = ""; printf("ffSubmitted %3d ffAccepted %3d ffOkScore %3d ffSolidMatch %2d\n", ffSubmitted, ffAccepted, ffOkScore, ffSolidMatch); endTime = clock1000(); printf("Total time is %4.2f\n", 0.001*(endTime-startTime)); /* Write out file who's presense say's we succeeded */ { FILE *f = mustOpen(okFileName, "w"); fputs("ok", f); fclose(f); } if (dumpMe) { htmEnd(bigHtmlFile); htmEnd(littleHtmlFile); } return 0; }
int main(int argc, char *argv[]) { char *genoListName; char *cdnaListName; char *oocFileName; char *hitFileName; char *mergerFileName; struct patSpace *patSpace; long startTime, endTime; char **genoList; int genoListSize; char *genoListBuf; char **cdnaList; int cdnaListSize; char *cdnaListBuf; char *genoName; int i; int estIx = 0; struct dnaSeq **seqListList = NULL, *seq; if (dumpMe) { bigHtmlFile = mustOpen("C:\\inetpub\\wwwroot\\test\\patAli.html", "w"); littleHtmlFile = mustOpen("C:\\inetpub\\wwwroot\\test\\patSpace.html", "w"); htmStart(bigHtmlFile, "PatSpace Alignments"); htmStart(littleHtmlFile, "PatSpace Index"); } if (argc != 6) usage(); startTime = clock1000(); dnaUtilOpen(); makePolys(); genoListName = argv[1]; cdnaListName = argv[2]; oocFileName = argv[3]; hitFileName = argv[4]; mergerFileName = argv[5]; readAllWords(genoListName, &genoList, &genoListSize, &genoListBuf); readAllWords(cdnaListName, &cdnaList, &cdnaListSize, &cdnaListBuf); hitOut = mustOpen(hitFileName, "w"); mergerOut = mustOpen(mergerFileName, "w"); dumpOut = mustOpen("dump.out", "w"); seqListList = needMem(genoListSize*sizeof(seqListList[0]) ); fprintf(hitOut, "Pattern space 0.2 cDNA matcher\n"); fprintf(hitOut, "cDNA files: ", cdnaListSize); for (i=0; i<cdnaListSize; ++i) fprintf(hitOut, " %s", cdnaList[i]); fprintf(hitOut, "\n"); fprintf(hitOut, "%d genomic files\n", genoListSize); for (i=0; i<genoListSize; ++i) { genoName = genoList[i]; if (!startsWith("//", genoName) ) { seqListList[i] = seq = faReadAllDna(genoName); fprintf(hitOut, "%d els in %s ", slCount(seq), genoList[i]); for (; seq != NULL; seq = seq->next) fprintf(hitOut, "%d ", seq->size); fprintf(hitOut, "\n"); } } patSpace = makePatSpace(seqListList, genoListSize, oocFileName); for (i=0; i<cdnaListSize; ++i) { FILE *f; char *estFileName; DNA *dna; char *estName; int size; int c; int maxSizeForFuzzyFind = 20000; int dotCount = 0; estFileName = cdnaList[i]; if (startsWith("//", estFileName) ) continue; f = mustOpen(estFileName, "rb"); while ((c = fgetc(f)) != EOF) if (c == '>') break; printf("%s", cdnaList[i]); fflush(stdout); while (fastFaReadNext(f, &dna, &size, &estName)) { if (size < maxSizeForFuzzyFind) /* Some day need to fix this somehow... */ { struct cdnaAliList *calList = NULL; patSpaceFindOne(patSpace, dna, size, '+', estName, estIx, &calList); reverseComplement(dna, size); patSpaceFindOne(patSpace, dna, size, '-', estName, estIx, &calList); slReverse(&calList); writeMergers(calList, estName, size, genoList); ++estIx; if ((estIx & 0xfff) == 0) { printf("."); ++dotCount; fflush(stdout); } } } printf("\n"); } printf("raw %4d ffSubmitted %3d ffAccepted %3d ffOkScore %3d ffSolidMatch %2d\n", grandTotalHits, ffSubmitted, ffAccepted, ffOkScore, ffSolidMatch); endTime = clock1000(); printf("Total time is %4.2f\n", 0.001*(endTime-startTime)); if (dumpMe) { htmEnd(bigHtmlFile); htmEnd(littleHtmlFile); } return 0; }
void writeTableHtml(struct sqlConnection *conn, struct eapGraph *eg, struct fullExperiment *expList, char *assembly, char *outFile) /* Write out a table in simple html based on eeList*/ { char *lightGreen = "#D8FFD8"; char *brightWhite = "#FFFFFF"; FILE *f = mustOpen(outFile, "w"); htmStart(f, "DNase-seq experiment table"); fprintf(f, "<TABLE>\n"); fprintf(f, "<TR>\n"); fprintf(f, "<TH></TH>"); fprintf(f, "<TH>promo</TH>"); fprintf(f, "<TH>promo</TH>"); fprintf(f, "<TH>open</TH>"); fprintf(f, "<TH>open</TH>"); fprintf(f, "<TH>cross</TH>"); fprintf(f, "<TH> </TH>"); fprintf(f, "<TH></TH>"); fprintf(f, "<TH></TH>"); fprintf(f, "<TH></TH>"); fprintf(f, "<TH></TH>"); fprintf(f, "<TH></TH>"); fprintf(f, "<TH></TH>"); fprintf(f, "<TH></TH>"); fprintf(f, "</TR>\n"); fprintf(f, "<TR>\n"); fprintf(f, "<TH>Biosample</TH>"); fprintf(f, "<TH>enrich</TH>"); fprintf(f, "<TH>cover</TH>"); fprintf(f, "<TH>enrich</TH>"); fprintf(f, "<TH>cover</TH>"); fprintf(f, "<TH>enrich</TH>"); fprintf(f, "<TH> </TH>"); fprintf(f, "<TH BGCOLOR='%s'>reads(1)</TH>", lightGreen); fprintf(f, "<TH BGCOLOR='%s'>map(1)</TH>", lightGreen); fprintf(f, "<TH BGCOLOR='%s'>umap(1)</TH>", lightGreen); fprintf(f, "<TH BGCOLOR='%s'>U4M(1)</TH>", lightGreen); fprintf(f, "<TH BGCOLOR='%s'>Spot(1)</TH>", lightGreen); fprintf(f, "<TH BGCOLOR='%s'>SpotX(1)</TH>", lightGreen); fprintf(f, "<TH BGCOLOR='%s'>NSC(1)</TH>", lightGreen); fprintf(f, "<TH BGCOLOR='%s'>RSC(1)</TH>", lightGreen); fprintf(f, "<TH> </TH>"); fprintf(f, "<TH>reads(2)</TH>"); fprintf(f, "<TH>map(2)</TH>"); fprintf(f, "<TH>umap(2)</TH>"); fprintf(f, "<TH>U4M(2)</TH>"); fprintf(f, "<TH>Spot(2)</TH>"); fprintf(f, "<TH>SpotX(2)</TH>"); fprintf(f, "<TH>NSC(2)</TH>"); fprintf(f, "<TH>RSC(2)</TH>"); fprintf(f, "</TR>\n"); struct fullExperiment *exp; for (exp = expList; exp != NULL; exp = exp->next) { struct edwExperiment *ee = exp->exp; fprintf(f, "<TR>\n"); fprintf(f, "<TD>%s</TD>\n", ee->biosample); long long pooledPeakId = pooledBroadPeaksForExp(exp); if (pooledPeakId > 0) { struct edwBamFile *bamFileList = bamFilesAncestralToFile(conn, eg, pooledPeakId); long long readCount = 0, mappedCount = 0, uniqueMappedCount = 0; double u4mUniqueRatio = 0; bamListStats(bamFileList, &readCount, &mappedCount, &uniqueMappedCount, &u4mUniqueRatio); printEnrichment(conn, pooledPeakId, "promoter", f); printEnrichment(conn, pooledPeakId, "open", f); } else { fprintf(f, "<TD>n/a</TD>\n"); fprintf(f, "<TD>n/a</TD>\n"); fprintf(f, "<TD>n/a</TD>\n"); fprintf(f, "<TD>n/a</TD>\n"); fprintf(f, "<TD>n/a</TD>\n"); fprintf(f, "<TD>n/a</TD>\n"); fprintf(f, "<TD>n/a</TD>\n"); } uglyOne = (pooledPeakId == 74961); struct edwQaPairSampleOverlap *so = findSampleOverlap(conn, eg, pooledPeakId); if (so != NULL) { fprintf(f, "<TD>%4.2fx</TD>\n", so->sampleSampleEnrichment); } else { uglyf("Trouble finding sample/overlap for file %lld exp %s\n", pooledPeakId, exp->name); fprintf(f, "<TD>n/a</TD>\n"); } fprintf(f, "<TD> </TD>\n"); printOneRep(conn, eg, exp, mustFindReplicateInList(exp->repList, "1"), lightGreen, f); fprintf(f, "<TH> </TH>"); printOneRep(conn, eg, exp, mustFindReplicateInList(exp->repList, "2"), brightWhite, f); fprintf(f, "</TR>\n"); } fprintf(f, "</TABLE>\n"); htmEnd(f); carefulClose(&f); }
int main(int argc, char *argv[]) { char *outName; char xaFileName[512]; char region[64]; FILE *xaFile, *out; struct xaAli *xaList = NULL, *xa; char *sortBy; char *subtitle; int (*cmp)(const void *va, const void *vb); if (argc != 3) { usage(); } sortBy = argv[1]; outName = argv[2]; if (sameWord(sortBy, "score")) { cmp = cmpXaScore; subtitle = "(sorted by alignment score)"; } else if (sameWord(sortBy, "briggsae")) { cmp = cmpXaQuery; subtitle = "(sorted by <I>C. briggsae</I> region)"; } else if (sameWord(sortBy, "elegans")) { cmp = cmpXaTarget; subtitle = "(sorted by <I>C. elegans</I> region)"; } else usage(); /* Read in alignment file. */ sprintf(xaFileName, "%s%s/all%s", wormXenoDir(), "cbriggsae", xaAlignSuffix()); printf("Scanning %s\n", xaFileName); xaFile = xaOpenVerify(xaFileName); while ((xa = xaReadNext(xaFile, FALSE)) != NULL) { xa->milliScore = round(0.001 * xa->milliScore * (xa->tEnd - xa->tStart)); freeMem(xa->qSym); freeMem(xa->tSym); freeMem(xa->hSym); slAddHead(&xaList, xa); } /* Sort by score. */ printf("Sorting..."); slSort(&xaList, cmp); printf(" best score %d\n", xaList->milliScore); /* Write out .html */ printf("Writing %s\n", outName); out = mustOpen(outName, "w"); htmStart(out, "C. briggsae/C. elegans Homologies"); fprintf(out, "<H2>Regions with Sequenced <I>C. briggsae</I> Homologs</H2>\n"); fprintf(out, "<H3>%s</H3>\n", subtitle); fprintf(out, "<TT><PRE><B>"); fprintf(out, "Score <I>C. elegans Region</I> <I>C. briggsae</I> Region </B>\n"); fprintf(out, "--------------------------------------------------------\n"); for (xa = xaList; xa != NULL; xa = xa->next) { fprintf(out, "%6d ", xa->milliScore); sprintf(region, "%s:%d-%d", xa->target, xa->tStart, xa->tEnd); fprintf(out, "<A HREF=\"../cgi-bin/tracks.exe?where=%s\">%21s</A> %s:%d-%d %c", region, region, xa->query, xa->qStart, xa->qEnd, xa->qStrand); fprintf(out, "\n"); } htmEnd(out); return 0; }