void gifLabelVerticalText(char *fileName, char **labels, int labelCount, int height) /* Make a gif file with given labels. This will check to see if fileName * exists already and has not changed, and if so do nothing. */ { struct memGfx *straight = altColorLabels(labels, labelCount, height); struct memGfx *rotated = mgRotate90(straight); struct memGfx *existing = NULL; #ifdef USE_PNG struct tempName tn; makeTempName(&tn, "gifLabelVertTemp", ".png"); mgSavePng(rotated, tn.forCgi, FALSE); rename(tn.forCgi, fileName); #else if (fileExists(fileName)) existing = mgLoadGif(fileName); /* the savings here is in the user's own browser cache - not updated if no change */ if (!sameGifContents(rotated, existing)) { struct tempName tn; makeTempName(&tn, "gifLabelVertTemp", ".gif"); mgSaveGif(rotated, tn.forCgi, FALSE); rename(tn.forCgi, fileName); } #endif mgFree(&straight); mgFree(&rotated); if (existing) mgFree(&existing); }
void chkGlue(char *bacAcc, char *finBac, char *unfinBac, char *gluedBac, int trim, char *repeatMask) /* Display glued and unglued form of BAC. */ { int trackCount = 1; int pixWidth, pixHeight; int x, y; struct memGfx *mg; struct tempName gifTn, mapTn; FILE *mapFile; printf("See picture at bottom for overview of where contigs align.\n\n"); /* Figure out basic dimensions and allocate picture. */ font = mgSmallFont(); trackWidth = 700; trackHeight = mgFontPixelHeight(font) + 4; pixWidth = trackWidth + 2*border; pixHeight = trackCount * (trackHeight+border) + border; x = y = border; mg = mgNew(pixWidth, pixHeight); mgClearPixels(mg); makeBlockColors(mg); /* Create map file. */ makeTempName(&mapTn, "glu", ".map"); mapFile = mustOpen(mapTn.forCgi, "wb"); mapWriteHead(mapFile, pixWidth, pixHeight, bacAcc, trim, repeatMask); /* Write out tracks onto picture. */ aliTrack(bacAcc, finBac, unfinBac, mg, x, y, mapFile, trim, repeatMask); /* Save pic and tell html file about it. */ makeTempName(&gifTn, "glu", ".gif"); mgSaveGif(mg, gifTn.forCgi); printf("<INPUT TYPE=HIDDEN NAME=map VALUE=\"%s\">\n", mapTn.forCgi); printf( "<P><INPUT TYPE=IMAGE SRC = \"%s\" BORDER=1 WIDTH=%d HEIGHT=%d NAME = \"clickMe\" ALIGN=BOTTOM><BR>\n", gifTn.forHtml, pixWidth, pixHeight); printf("Click on contig for detailed alignment\n"); /* Write end of map */ mapWriteBox(mapFile, mtNone, 0, 0, pixWidth, pixHeight, NULL, 0, 0, 0, 0, 0); mapWriteBox(mapFile, mtEnd, 0, 0, pixWidth, pixHeight, NULL, 0, 0, 0, 0, 0); /* Clean up. */ fclose(mapFile); mgFree(&mg); }
// Compute the initial BWTs for the input file split into blocks of records using the BCR algorithm MergeVector computeInitialBCR(const BWTDiskParameters& parameters) { SeqReader* pReader = new SeqReader(parameters.inFile); SeqRecord record; int groupID = 0; size_t numReadTotal = 0; MergeVector mergeVector; MergeItem mergeItem; mergeItem.start_index = 0; // Phase 1: Compute the initial BWTs DNAEncodedStringVector readSequences; bool done = false; while(!done) { done = !pReader->get(record); if(!done) { // the read is valid SeqItem item = record.toSeqItem(); if(parameters.bBuildReverse) item.seq.reverse(); readSequences.push_back(item.seq.toString()); ++numReadTotal; } if(readSequences.size() >= parameters.numReadsPerBatch || (done && readSequences.size() > 0)) { std::string bwt_temp_filename = makeTempName(parameters.outPrefix, groupID, parameters.bwtExtension); std::string sai_temp_filename = makeTempName(parameters.outPrefix, groupID, parameters.saiExtension); BWTCA::runBauerCoxRosone(&readSequences, bwt_temp_filename, sai_temp_filename); // Push the merge info mergeItem.end_index = numReadTotal - 1; // inclusive mergeItem.reads_filename = parameters.inFile; mergeItem.bwt_filename = bwt_temp_filename; mergeItem.sai_filename = sai_temp_filename; mergeVector.push_back(mergeItem); // Start the new group mergeItem.start_index = numReadTotal; ++groupID; readSequences.clear(); } } delete pReader; return mergeVector; }
int main(int argc, char *argv[]) /* Process command line and call passes. */ { char *command; if (argc < 3) usage(); command = argv[1]; if (sameWord(command, "1")) { if (argc != 5) usage(); firstPass(argv[2], argv[3], argv[4]); } else if (sameWord(command, "2")) { if (argc != 4) usage(); secondPass(argv[2], argv[3]); } else if (sameWord(command, "3")) { if (argc != 4) usage(); thirdPass(argv[2], argv[3]); } else if (sameWord(command, "all")) { struct tempName tn1,tn2; if (argc != 5) usage(); makeTempName(&tn1, "waba", ".1"); makeTempName(&tn2, "waba", ".2"); firstPass(argv[2], argv[3], tn1.forCgi); secondPass(tn1.forCgi,tn2.forCgi); thirdPass(tn2.forCgi,argv[4]); remove(tn1.forCgi); remove(tn2.forCgi); } else if (sameWord(command, "view")) { if (argc != 3) usage(); viewWaba(argv[2]); } else { usage(); } }
void doAdvFilterKeyPasted(struct sqlConnection *conn, struct column *colList, struct column *col) /* Handle submission in key-paste in form. */ { char *pasteVarName = colVarName(col, keyWordPastedPrefix); char *pasteVal = trimSpaces(cartString(cart, pasteVarName)); char *keyVarName = advFilterName(col, "keyFile"); if (pasteVal == NULL || pasteVal[0] == 0) { /* If string is empty then clear cart variable. */ cartRemove(cart, keyVarName); } else { /* Else write variable to temp file and save temp * file name. */ struct tempName tn; FILE *f; makeTempName(&tn, "near", ".key"); f = mustOpen(tn.forCgi, "w"); mustWrite(f, pasteVal, strlen(pasteVal)); carefulClose(&f); cartSetString(cart, keyVarName, tn.forCgi); } cartRemovePrefix(cart, keyWordPastedPrefix); doAdvFilter(conn, colList); }
static void parseCustomTracks(char *db, char *inFile, char *trashFile) /* parse tracks from input file, and also from trashfile if not null */ { char *text; struct customTrack *ctList = NULL, *oldCts = NULL; readInGulp(inFile, &text, NULL); /* read new CT's from input */ ctList = customFactoryParse(db, text, FALSE, NULL); verbose(3, "parsed %d tracks from %s\n", slCount(ctList), inFile); if (trashFile) { /* read old CT's from trash file */ oldCts = customFactoryParse(db, trashFile, TRUE, NULL); /* merge old and new */ ctList = customTrackAddToList(ctList, oldCts, NULL, TRUE); } /* save to new trash file */ static struct tempName tn; makeTempName(&tn, "ctTest", ".bed"); customTracksSaveFile(db, ctList, tn.forCgi); /* reload from new trash file */ ctList = NULL; ctList = customFactoryParse(db, tn.forCgi, TRUE, NULL); customTracksSaveFile(db, ctList, "stdout"); /* cleanup */ unlink(tn.forCgi); }
void saveClonePos(struct clonePos *cloneList, char *database) /* Save sorted clone position list to database. */ { struct sqlConnection *conn = sqlConnect(database); struct clonePos *clone; struct tempName tn; FILE *f; struct dyString *ds = newDyString(2048); /* Create tab file from clone list. */ printf("Creating tab file\n"); makeTempName(&tn, "hgCP", ".tab"); f = mustOpen(tn.forCgi, "w"); for (clone = cloneList; clone != NULL; clone = clone->next) clonePosTabOut(clone, f); fclose(f); /* Create table if it doesn't exist, delete whatever is * already in it, and fill it up from tab file. */ printf("Loading clonePos table\n"); sqlMaybeMakeTable(conn, "clonePos", createClonePos); sqlUpdate(conn, "NOSQLINJ DELETE from clonePos"); sqlDyStringPrintf(ds, "LOAD data local infile '%s' into table clonePos", tn.forCgi); sqlUpdate(conn, ds->string); /* Clean up. */ remove(tn.forCgi); sqlDisconnect(&conn); }
void doMatrixPlot(struct dyString *script, char *skipPSet, char *tableName, char *type, boolean linesOnly) /* Print out a matrix plot for a particular table. */ { struct tempName plotFile; struct dyString *query = newDyString(256); struct dMatrix *dM = NULL; char title[256]; safef(title, sizeof(title), "%s - %s", altEvent->geneName, type); if(cgiBoolean("pdf")) makeTempName(&plotFile, "sp", ".pdf"); else makeTempName(&plotFile, "sp", ".png"); touchBlank(plotFile.forCgi); makePlotLink(type, plotFile.forCgi); //dyStringPrintf(html, "<img src='%s'><br>\n", plotFile.forCgi); constructQueryForEvent(query, skipPSet, tableName); dM = dataFromTable(tableName, query->string); plotMatrixRows(dM, script, title, plotFile.forCgi, type, linesOnly) ; }
void motifLogoAndMatrix(struct dnaSeq **seqs, int count, struct dnaMotif *motif) /* Print out motif sequence logo and text (possibly with multiple occurences) */ { // Detect inconsistent motif/pwm tables and suppress confusing display if (motif != NULL) { if (seqs != NULL && motif->columnCount != seqs[0]->size) { warn("Motif seq length doesn't match PWM\n"); return; } } #define MOTIF_HELP_PAGE "../goldenPath/help/hgRegMotifHelp.html" printf("<PRE>\n"); printf("<table>\n"); if (motif != NULL) { struct tempName pngTn; dnaMotifMakeProbabalistic(motif); makeTempName(&pngTn, "logo", ".png"); dnaMotifToLogoPng(motif, 47, 140, NULL, "../trash", pngTn.forCgi); printf("<tr><td></td><td colspan='%d'align=right><a href=\"%s\" target=_blank>Motif display help</a></td></tr>", motif->columnCount, MOTIF_HELP_PAGE); printf("<tr><td></td><td colspan='%d'>", motif->columnCount); printf("<IMG SRC=\"%s\" BORDER=1>", pngTn.forHtml); printf("</td><td></td></tr>\n"); } if (count > 0) { int i; for (i = 0; i < count; i++) { struct dnaSeq *seq = seqs[i]; printf("<tr><td></td>"); touppers(seq->dna); printDnaCells(seq->dna, seq->size); if (count == 1) printf("<td>this occurrence</td></tr>\n"); else // is there a library routine to get 1st, 2nd ...? printf("<td>occurrence #%d</td></tr>\n", i + 1); } } if (motif != NULL) { printf("<tr><td></td>"); printConsensus(motif); printf("<td>motif consensus</td></tr>\n"); dnaMotifPrintProbTable(motif, stdout); } printf("</table>\n"); printf("</PRE>"); }
void invokeR(struct dyString *script) /* Call R on our script. */ { struct tempName rScript; FILE *out = NULL; char command[256]; assert(script); makeTempName(&rScript, "sp", ".R"); out = mustOpen(rScript.forCgi, "w"); fprintf(out, "%s", script->string); carefulClose(&out); safef(command, sizeof(command), "R --vanilla < %s >& /dev/null ", rScript.forCgi); system(command); }
void gifLabelVerticalText(char *fileName, char **labels, int labelCount, int height) /* Make a gif file with given labels. This will check to see if fileName * exists already and has not changed, and if so do nothing. */ { struct memGfx *straight = altColorLabels(labels, labelCount, height); struct memGfx *rotated = mgRotate90(straight); struct memGfx *existing = NULL; struct tempName tn; makeTempName(&tn, "gifLabelVertTemp", ".png"); mgSavePng(rotated, tn.forCgi, FALSE); rename(tn.forCgi, fileName); mgFree(&straight); mgFree(&rotated); if (existing) mgFree(&existing); }
int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, optionSpecs); saveId = optionExists("saveId"); inputList = optionVal("inputList",inputList); tempDir = optionVal("tempDir",tempDir); if ((argc < 2 && !inputList) || (argc > 1 && inputList)) usage(); if (tempDir[0]!=0 && lastChar(tempDir) != '/') tempDir = addSuffix(tempDir,"/"); if (argc-1 <= MAXFILES && !inputList) { chainMergeSort(argc-1, argv+1, stdout, 0); } else { char *inp0 = addSuffix(tempDir,"inputList0-"); makeTempName(&tempName, inp0, ".tmp"); freez(&inp0); inp0 = cloneString(tempName.forCgi); if (!inputList) { FILE *f = mustOpen(inp0,"w"); int i=0; for (i=1; i<argc; ++i) { fprintf(f, "%s\n", argv[i]); } carefulClose(&f); inputList = inp0; } hierSort(inputList); if (sameString(inputList,inp0)) remove(inp0); freez(&inp0); } return 0; }
char *altGraphXMakeImage(struct altGraphX *ag) /* create a drawing of splicing pattern */ { MgFont *font = mgSmallFont(); int fontHeight = mgFontLineHeight(font); struct spaceSaver *ssList = NULL; struct hash *heightHash = NULL; int rowCount = 0; struct tempName gifTn; int pixWidth = atoi(cartUsualString(cart, "pix", DEFAULT_PIX_WIDTH )); int pixHeight = 0; struct hvGfx *hvg; int lineHeight = 0; double scale = 0; Color shadesOfGray[9]; int maxShade = ArraySize(shadesOfGray)-1; scale = (double)pixWidth/(ag->tEnd - ag->tStart); lineHeight = 2 * fontHeight +1; altGraphXLayout(ag, ag->tStart, ag->tEnd, scale, 100, &ssList, &heightHash, &rowCount); hashFree(&heightHash); pixHeight = rowCount * lineHeight; makeTempName(&gifTn, "hgc", ".png"); hvg = hvGfxOpenPng(pixWidth, pixHeight, gifTn.forCgi, FALSE); makeGrayShades(hvg, maxShade, shadesOfGray); hvGfxSetClip(hvg, 0, 0, pixWidth, pixHeight); altGraphXDrawPack(ag, ssList, hvg, 0, 0, pixWidth, lineHeight, lineHeight-1, ag->tStart, ag->tEnd, scale, font, MG_BLACK, shadesOfGray, "Dummy", NULL); hvGfxUnclip(hvg); hvGfxClose(&hvg); printf( "<IMG SRC = \"%s\" BORDER=1 WIDTH=%d HEIGHT=%d><BR>\n", gifTn.forHtml, pixWidth, pixHeight); return cloneString(gifTn.forHtml); }
static void transRegCodeMotifPrint(struct section *section, struct sqlConnection *conn, char *geneId) /* Print out transcription regulatory code info. */ { struct dnaMotif *motif = transRegMotif(conn, geneId); if (motif != NULL) { struct tempName pngTn; dnaMotifMakeProbabalistic(motif); makeTempName(&pngTn, "logo", ".png"); dnaMotifToLogoPng(motif, 47, 140, NULL, "../trash", pngTn.forCgi); hPrintf(" "); hPrintf("<IMG SRC=\"%s\" BORDER=1>", pngTn.forHtml); hPrintf("\n"); hPrintf("<PRE>"); dnaMotifPrintProb(motif, stdout); hPrintf("</PRE><BR>\n"); hPrintf("This data is from "); hPrintf("<A HREF=\"%s\" TARGET=_blank>", "http://www.nature.com/cgi-taf/DynaPage.taf?file=/nature/journal/v431/n7004/abs/nature02800_fs.html"); hPrintf("Transcriptional regulatory code of a eukaryotic genome</A> "); hPrintf("by Harbison, Gordon, et al."); } }
struct psl* doDnaAlignment(struct dnaSeq *seq, char *db, char *blatHost, char *port, char *nibDir, struct hash *tFileCache) /* get the alignment from the blat host for this sequence */ { struct psl *pslList = NULL; int conn =0; struct tempName pslTn; FILE *f = NULL; struct gfOutput *gvo; if(seq == NULL || db == NULL) errAbort("coordConv::doDnaAlignment() - dnaSeq and/or db can't be NULL."); if(strlen(seq->dna) != seq->size) errAbort("coordConv::doDnaAlignment() - there seems to be something fishy about %s: the size doesn't equal the length", seq->name); /* if there are too many n's it can cause the blat server to hang */ if(strstr(seq->dna, "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn") ) return NULL; makeTempName(&pslTn,"ccR", ".psl"); f = mustOpen(pslTn.forCgi, "w"); gvo = gfOutputPsl(920, FALSE, FALSE, f, FALSE, FALSE); gfOutputHead(gvo, f); /* align to genome, both strands */ conn = gfConnect(blatHost, port); gfAlignStrand(&conn, nibDir, seq, FALSE, 20, tFileCache, gvo); reverseComplement(seq->dna, seq->size); conn = gfConnect(blatHost, port); gfAlignStrand(&conn, nibDir, seq, TRUE, 20 , tFileCache, gvo); gfOutputQuery(gvo, f); carefulClose(&f); pslList = pslLoadAll(pslTn.forCgi); remove(pslTn.forCgi); gfOutputFree(&gvo); return pslList; }
/** Generates the data plot and associated html */ void doCountsPage(struct sageExp *seList, struct sage *sgList) { struct gnuPlot2D *gp = needMem(sizeof(struct gnuPlot2D*)); char *cmd = NULL; double xSize; double ySize; char *title = NULL; char *xTics = NULL; char plotSize[256]; // = cloneString("set size .75,.75\n"); struct tempName pngTn; chuckHtmlStart("Sage Graph"); printf("<center>"); makeTempName(&pngTn, "sageDat", ".png"); gp = createSagePlot(seList, sgList); gp->fileName = pngTn.forCgi; xTics = constructXticsFromExps(seList); title = cloneString("Sage Data for Unigene Clusters"); xSize = 0.020*slCount(gp->gpList[0]); ySize = 0.075*slCount(sgList); if(ySize < 1.25) ySize = 1.25; if(xSize <.75) xSize = .75; sprintf(plotSize, "set size %g, %g\n", xSize,ySize); gp->other = cloneString(plotSize); dynamicStrncat(&gp->other, xTics); gp->ylabel = cloneString("Median Counts"); gp->xlabel = cloneString("Experiment"); gp->title = cloneString(title); gp->xMax = slCount(gp->gpList[0]); gp->yMax = maxDataVal; cmd = gptGenerateCmd(gp); gptPlotFromCmd(cmd); doPlotPrintOut(pngTn.forHtml); htmlEnd(); }
// The algorithm is as follows. We create M BWTs for subsets of // the input reads. These are created independently and written // to disk. They are then merged either sequentially or pairwise // to create the final BWT void buildBWTDisk(const BWTDiskParameters& parameters) { // Build the initial bwts for subsets of the data MergeVector mergeVector; if(parameters.bUseBCR) mergeVector = computeInitialBCR(parameters); else mergeVector = computeInitialSAIS(parameters); // Phase 2: Pairwise merge the BWTs int groupID = mergeVector.size(); // Initial the name of the next intermediate bwt int round = 1; MergeVector nextMergeRound; while(mergeVector.size() > 1) { std::cout << "Starting round " << round << "\n"; SeqReader* pReader = new SeqReader(parameters.inFile); SeqRecord record; for(size_t i = 0; i < mergeVector.size(); i+=2) { if(i + 1 != mergeVector.size()) { std::string bwt_merged_name = makeTempName(parameters.outPrefix, groupID, parameters.bwtExtension); std::string sai_merged_name = makeTempName(parameters.outPrefix, groupID, parameters.saiExtension); MergeItem item1 = mergeVector[i]; MergeItem item2 = mergeVector[i+1]; // Perform the actual merge int64_t curr_idx = merge(pReader, item1, item2, bwt_merged_name, sai_merged_name, parameters.bBuildReverse, parameters.numThreads, parameters.storageLevel); // pReader now points to the end of item1's block of // reads. Skip item2's reads assert(curr_idx == item2.start_index); while(curr_idx <= item2.end_index) { bool eof = !pReader->get(record); assert(!eof); (void)eof; ++curr_idx; } // Create the merged mergeItem to use in the next round MergeItem merged; merged.start_index = item1.start_index; merged.end_index = item2.end_index; merged.bwt_filename = bwt_merged_name; merged.sai_filename = sai_merged_name; nextMergeRound.push_back(merged); // Done with the temp files, remove them unlink(item1.bwt_filename.c_str()); unlink(item2.bwt_filename.c_str()); unlink(item1.sai_filename.c_str()); unlink(item2.sai_filename.c_str()); ++groupID; } else { // Singleton, pass through to the next round nextMergeRound.push_back(mergeVector[i]); } } delete pReader; mergeVector.clear(); mergeVector.swap(nextMergeRound); ++round; } assert(mergeVector.size() == 1); // Done, rename the files to their final name std::stringstream bwt_ss; bwt_ss << parameters.outPrefix << parameters.bwtExtension << (USE_GZ ? ".gz" : ""); std::string bwt_final_filename = bwt_ss.str(); rename(mergeVector.front().bwt_filename.c_str(), bwt_final_filename.c_str()); std::stringstream sai_ss; sai_ss << parameters.outPrefix << parameters.saiExtension << (USE_GZ ? ".gz" : ""); std::string sai_final_filename = sai_ss.str(); rename(mergeVector.front().sai_filename.c_str(), sai_final_filename.c_str()); }
struct mimePart *parseMultiParts(struct mimeBuf *b, char *altHeader) /* This is a recursive function. It parses multipart MIME messages. Data that are binary or too large will be saved in mimePart->filename otherwise saved as a c-string in mimePart->data. If multipart, then first child is mimePart->child, subsequent sibs are in child->next. altHeader is a string of headers that can be fed in if the headers have already been read off the stream by an earlier process, i.e. apache. */ { struct mimePart *p=AllocA(*p); char *parentboundary = NULL, *boundary = NULL; char *ct = NULL; boolean autoBoundary = FALSE; //debug //fprintf(stderr,"altHeader=[%s]\n",altHeader); if (sameOk(altHeader, "autoBoundary")) { /* process things with no explicit header. * look for *MIME* \n\n-- */ struct dyString *dy = dyStringNew(0); char *prevPrevLine = NULL; char *prevLine = NULL; char *line = NULL; boolean found = FALSE; autoBoundary = TRUE; while (TRUE) { if (b->i >= b->eoi && b->eoi < b->eom) /* at end of input */ break; line = getLineMB(b); if (line && startsWith("--",line) // && //sameString(prevLine,"") && //prevPrevLine && //stringIn("MULTI",prevPrevLine) && //stringIn("MIME",prevPrevLine) ) { found = TRUE; break; } freez(&prevPrevLine); prevPrevLine = prevLine; prevLine = line; if (prevPrevLine) touppers(prevPrevLine); } if (!found) errAbort("autoBoundary: No initial boundary found."); dyStringPrintf(dy, "CONTENT-TYPE:multipart/form-data; boundary=%s%s%s", line+2, getNewLineByType(), getNewLineByType() ); altHeader = dyStringCannibalize(&dy); //debug //fprintf(stderr,"autoBoundary altHeader = [%s]\n",altHeader); //fflush(stderr); freez(&prevPrevLine); freez(&prevLine); freez(&line); } //debug //fprintf(stderr,"\n"); readPartHeaderMB(b,p,altHeader); ct = hashFindVal(p->hdr,"content-type"); /* use lowercase key */ //debug //fprintf(stderr,"ct from hash:%s\n",ct); //fflush(stderr); if (ct && startsWith("multipart/",ct)) { char bound[MAXBOUNDARY]; char *bnd = NULL; struct mimePart *child = NULL; /* these 3 vars just for processing epilog chunk: */ char *bp=NULL; int size=0; boolean hasZeros=FALSE; /* save */ parentboundary = b->boundary; boundary = getMimeHeaderFieldVal(ct,"boundary"); if (strlen(boundary) >= MAXBOUNDARY) errAbort("error: boundary= value too long in MIME header Content-type:%s",ct); safef(bound, sizeof(bound), "--%s",boundary); /* do not prepend CRLF to boundary yet */ freez(&boundary); boundary = cloneString(bound); //debug //fprintf(stderr,"initial boundary parsed:%s\n",boundary); //fflush(stderr); if (!autoBoundary) { /* skip any extra "prolog" before the initial boundary marker */ while (TRUE) { bnd = getLineMB(b); if (sameString(bnd,boundary)) break; freez(&bnd); } //debug //fprintf(stderr,"initial boundary found:%s\n",bnd); //fflush(stderr); freez(&bnd); } /* include crlf in the boundary so bodies won't have trailing a CRLF * this is done here so that in case there's no extra CRLF * between the header and the boundary, it will still work, * so we only prepend the CRLF to the boundary after initial found */ safef(bound,sizeof(bound),"%s%s", getNewLineByType(), boundary); freez(&boundary); boundary=cloneString(bound); setBoundaryMB(b, boundary); while(TRUE) { int i = 0; char c1 = ' ', c2 = ' '; child = parseMultiParts(b,NULL); slAddHead(&p->multi,child); //call getLine, compare to boundary /* skip extra initial boundary marker - it's moot anyway */ freez(&bnd); //debug //fprintf(stderr,"post-parse pre-getLineMB dumpMB: "); //dumpMB(b); //debug for (i=0;i<strlen(boundary);++i) bound[i] = getcMB(b); bound[i] = 0; if (!sameString(bound,boundary)) errAbort("expected boundary %s, but found %s in MIME",boundary,bound); //debug //fprintf(stderr,"\nfound boundary:%s\n",bound); //fflush(stderr); c1 = getcMB(b); if (c1 == '-') { c2 = getcMB(b); if (c2 == '-') break; /* last boundary found */ else errAbort("expected -- after boundary %s, but found %c%c in MIME",boundary,c1,c2); } if (nlType == nlt_dos) c2 = getcMB(b); switch (nlType) { case nlt_dos: if (c1 == 0x0d && c2 == 0x0a) break; else errAbort("expected CRLF after boundary %s, but found %c%c in MIME",boundary,c1,c2); case nlt_unix: if (c1 == 0x0a) break; else errAbort("expected LF after boundary %s, but found %c in MIME",boundary,c1); case nlt_mac: if (c1 == 0x0d) break; else errAbort("expected CR after boundary %s, but found %c in MIME",boundary,c1); default: errAbort("unexpected nlType %d after boundary %s",nlType,boundary); } setEopMB(b); } freez(&bnd); slReverse(&p->multi); /* restore */ freez(&boundary); boundary = parentboundary; //debug //fprintf(stderr,"restoring parent boundary = %s\n",boundary); setBoundaryMB(b, boundary); /* dump any "epilog" that may be between the * end of the child boundary and the parent boundary */ getChunkMB(b, &bp, &size, &hasZeros); //debug //fprintf(stderr,"epilog size=%d\n",size); } else { char *bp=NULL; int size=0; boolean hasZeros=FALSE; boolean toobig=FALSE; boolean asFile=FALSE; boolean convert=FALSE; FILE *f = NULL; struct dyString *dy=newDyString(1024); //debug //fprintf(stderr,"starting new part (non-multi), dumpMB: \n"); //dumpMB(b); //debug //debug //ct = hashFindVal(p->hdr,"content-transfer-encoding"); /* use lowercase key */ //fprintf(stderr,"cte from hash:%s\n",ct); while(TRUE) { // break if eop, eod, eoi getChunkMB(b, &bp, &size, &hasZeros); //debug //fprintf(stderr,"bp=%lu size=%d, hasZeros=%d \n", // (unsigned long) bp, // size, // hasZeros); if (hasZeros) { p->binary=TRUE; } //if (hasZeros && !asFile) // { // convert=TRUE; // } if (!asFile && p->size+size > MAXPARTSIZE) { toobig = TRUE; convert=TRUE; } if (convert) { struct tempName uploadedData; convert=FALSE; asFile = TRUE; makeTempName(&uploadedData, "hgSs", ".cgi"); p->fileName=cloneString(uploadedData.forCgi); f = mustOpen(p->fileName,"w"); mustWrite(f,dy->string,dy->stringSize); freeDyString(&dy); } if (asFile) { mustWrite(f,bp,size); } else { dyStringAppendN(dy,bp,size); } p->size+=size; if (p->size > MAXDATASIZE) errAbort("max data size allowable for upload in MIME exceeded %llu",(unsigned long long)MAXDATASIZE); if (b->eop && b->i == b->eop) /* end of part */ { break; } if (b->i == b->eoi && b->eoi < b->eom) /* end of data */ { break; } moreMimeBuf(b); } if (dy) { p->data=needLargeMem(dy->stringSize+1); memcpy(p->data,dy->string,dy->stringSize); p->data[dy->stringSize] = 0; freeDyString(&dy); } if (f) carefulClose(&f); //debug //fprintf(stderr,"p->fileName=%s p->data=[%s]\n",p->fileName,p->data); } return p; }
// The algorithm is as follows. We create M BWTs for subsets of // the input reads. These are created independently and written // to disk. They are then merged either sequentially or pairwise // to create the final BWT void buildBWTDisk(const std::string& in_filename, const std::string& out_prefix, const std::string& bwt_extension, const std::string& sai_extension, bool doReverse, int numThreads, int numReadsPerBatch, int storageLevel) { size_t MAX_READS_PER_GROUP = numReadsPerBatch; SeqReader* pReader = new SeqReader(in_filename); SeqRecord record; int groupID = 0; size_t numReadTotal = 0; MergeVector mergeVector; MergeItem mergeItem; mergeItem.start_index = 0; // Phase 1: Compute the initial BWTs ReadTable* pCurrRT = new ReadTable; bool done = false; while(!done) { done = !pReader->get(record); if(!done) { // the read is valid SeqItem item = record.toSeqItem(); if(doReverse) item.seq.reverse(); pCurrRT->addRead(item); ++numReadTotal; } if(pCurrRT->getCount() >= MAX_READS_PER_GROUP || (done && pCurrRT->getCount() > 0)) { // Compute the SA and BWT for this group SuffixArray* pSA = new SuffixArray(pCurrRT, numThreads); // Write the BWT to disk std::string bwt_temp_filename = makeTempName(out_prefix, groupID, bwt_extension); pSA->writeBWT(bwt_temp_filename, pCurrRT); std::string sai_temp_filename = makeTempName(out_prefix, groupID, sai_extension); pSA->writeIndex(sai_temp_filename); // Push the merge info mergeItem.end_index = numReadTotal - 1; // inclusive mergeItem.reads_filename = in_filename; mergeItem.bwt_filename = bwt_temp_filename; mergeItem.sai_filename = sai_temp_filename; mergeVector.push_back(mergeItem); // Cleanup delete pSA; // Start the new group mergeItem.start_index = numReadTotal; ++groupID; pCurrRT->clear(); } } delete pCurrRT; delete pReader; // Phase 2: Pairwise merge the BWTs int round = 1; MergeVector nextMergeRound; while(mergeVector.size() > 1) { std::cout << "Starting round " << round << "\n"; pReader = new SeqReader(in_filename); for(size_t i = 0; i < mergeVector.size(); i+=2) { if(i + 1 != mergeVector.size()) { std::string bwt_merged_name = makeTempName(out_prefix, groupID, bwt_extension); std::string sai_merged_name = makeTempName(out_prefix, groupID, sai_extension); MergeItem item1 = mergeVector[i]; MergeItem item2 = mergeVector[i+1]; // Perform the actual merge int64_t curr_idx = merge(pReader, item1, item2, bwt_merged_name, sai_merged_name, doReverse, numThreads, storageLevel); // pReader now points to the end of item1's block of // reads. Skip item2's reads assert(curr_idx == item2.start_index); while(curr_idx <= item2.end_index) { bool eof = !pReader->get(record); assert(!eof); (void)eof; ++curr_idx; } // Create the merged mergeItem to use in the next round MergeItem merged; merged.start_index = item1.start_index; merged.end_index = item2.end_index; merged.bwt_filename = bwt_merged_name; merged.sai_filename = sai_merged_name; nextMergeRound.push_back(merged); // Done with the temp files, remove them unlink(item1.bwt_filename.c_str()); unlink(item2.bwt_filename.c_str()); unlink(item1.sai_filename.c_str()); unlink(item2.sai_filename.c_str()); ++groupID; } else { // Singleton, pass through to the next round nextMergeRound.push_back(mergeVector[i]); } } delete pReader; mergeVector.clear(); mergeVector.swap(nextMergeRound); ++round; } assert(mergeVector.size() == 1); // Done, rename the files to their final name std::stringstream bwt_ss; bwt_ss << out_prefix << bwt_extension << (USE_GZ ? ".gz" : ""); std::string bwt_final_filename = bwt_ss.str(); rename(mergeVector.front().bwt_filename.c_str(), bwt_final_filename.c_str()); std::stringstream sai_ss; sai_ss << out_prefix << sai_extension << (USE_GZ ? ".gz" : ""); std::string sai_final_filename = sai_ss.str(); rename(mergeVector.front().sai_filename.c_str(), sai_final_filename.c_str()); }
void doMiddle(struct cart *theCart) /* Set up globals and make web page */ { /* struct liftOverChain *chainList = NULL, *chain; */ char *userData; /* char *dataFile; */ char *dataFormat; char *organism; char *db; float minBlocks, minMatch; boolean multiple, fudgeThick; int minSizeQ, minSizeT; boolean refreshOnly = FALSE; /* char *err = NULL; */ struct liftOverChain *chainList = NULL, *choice; cart = theCart; if (cgiOptionalString(HGLFT_ERRORHELP_VAR)) { puts("<PRE>"); puts(liftOverErrHelp()); //system("/usr/bin/cal"); puts("</PRE>"); return; } /* Get data to convert - from userData variable, or if * that is empty from a file. */ if (cartOptionalString(cart, "SubmitFile")) userData = cartOptionalString(cart, HGLFT_DATAFILE_VAR); else userData = cartOptionalString(cart, HGLFT_USERDATA_VAR); dataFormat = cartCgiUsualString(cart, HGLFT_DATAFORMAT_VAR, DEFAULT_FORMAT); cartWebStart(cart, NULL, "Lift Genome Annotations"); getDbAndGenome(cart, &db, &organism, oldVars); chainList = liftOverChainListFiltered(); choice = defaultChoices(chainList, db); if (choice == NULL) errAbort("Sorry, no conversions available from this assembly\n"); minSizeQ = cartCgiUsualInt(cart, HGLFT_MINSIZEQ, choice->minSizeQ); minSizeT = cartCgiUsualInt(cart, HGLFT_MINSIZET, choice->minSizeT); minBlocks = cartCgiUsualDouble(cart, HGLFT_MINBLOCKS, choice->minBlocks); minMatch = cartCgiUsualDouble(cart, HGLFT_MINMATCH, choice->minMatch); fudgeThick = cartCgiUsualBoolean(cart, HGLFT_FUDGETHICK, (choice->fudgeThick[0]=='Y') ? TRUE : FALSE); multiple = cartCgiUsualBoolean(cart, HGLFT_MULTIPLE, (choice->multiple[0]=='Y') ? TRUE : FALSE); refreshOnly = cartCgiUsualInt(cart, HGLFT_REFRESHONLY_VAR, 0); webMain(choice, dataFormat, multiple); liftOverChainFreeList(&chainList); if (!refreshOnly && userData != NULL && userData[0] != '\0') { struct hash *chainHash = newHash(0); char *chainFile; struct tempName oldTn, mappedTn, unmappedTn; FILE *old, *mapped, *unmapped; char *line; int lineSize; char *fromDb, *toDb; int ct = 0, errCt = 0; /* read in user data and save to file */ makeTempName(&oldTn, HGLFT, ".user"); old = mustOpen(oldTn.forCgi, "w"); fputs(userData, old); fputs("\n", old); /* in case user doesn't end last line */ carefulClose(&old); chmod(oldTn.forCgi, 0666); /* setup output files -- one for converted lines, the other * for lines that could not be mapped */ makeTempName(&mappedTn, HGLFT, ".bed"); makeTempName(&unmappedTn, HGLFT, ".err"); mapped = mustOpen(mappedTn.forCgi, "w"); chmod(mappedTn.forCgi, 0666); unmapped = mustOpen(unmappedTn.forCgi, "w"); chmod(unmappedTn.forCgi, 0666); fromDb = cgiString(HGLFT_FROMDB_VAR); toDb = cgiString(HGLFT_TODB_VAR); chainFile = liftOverChainFile(fromDb, toDb); if (chainFile == NULL) errAbort("ERROR: Can't convert from %s to %s: no chain file loaded", fromDb, toDb); readLiftOverMap(chainFile, chainHash); if (sameString(dataFormat, WIGGLE_FORMAT)) /* TODO: implement Wiggle */ {} else if (sameString(dataFormat, POSITION_FORMAT)) { /* minSizeT here and in liftOverChain.c/h has been renamed minChainT in liftOver.c */ /* ignore multiple, it must be false when position is used */ ct = liftOverPositions(oldTn.forCgi, chainHash, minMatch, minBlocks, 0, minSizeQ, minSizeT, 0, fudgeThick, mapped, unmapped, FALSE, NULL, &errCt); } else if (sameString(dataFormat, BED_FORMAT)) { /* minSizeT here and in liftOverChain.c/h has been renamed minChainT in liftOver.c */ ct = liftOverBed(oldTn.forCgi, chainHash, minMatch, minBlocks, 0, minSizeQ, minSizeT, 0, fudgeThick, mapped, unmapped, multiple, NULL, &errCt); } else /* programming error */ errAbort("ERROR: Unsupported data format: %s\n", dataFormat); webNewSection("Results"); if (ct) { /* some records succesfully converted */ cgiParagraph(""); printf("Successfully converted %d record", ct); printf("%s: ", ct > 1 ? "s" : ""); printf("<A HREF=%s TARGET=_blank>View Conversions</A>\n", mappedTn.forCgi); } if (errCt) { /* some records not converted */ cgiParagraph(""); printf("Conversion failed on %d record", errCt); printf("%s. ", errCt > 1 ? "s" : ""); printf("<A HREF=%s TARGET=_blank>Display failure file</A> \n", unmappedTn.forCgi); printf("<A HREF=\"../cgi-bin/hgLiftOver?%s=1\" TARGET=_blank>Explain failure messages</A>\n", HGLFT_ERRORHELP_VAR); puts("<P>Failed input regions:\n"); struct lineFile *errFile = lineFileOpen(unmappedTn.forCgi, TRUE); puts("<BLOCKQUOTE><PRE>\n"); while (lineFileNext(errFile, &line, &lineSize)) puts(line); lineFileClose(&errFile); puts("</PRE></BLOCKQUOTE>\n"); } if (sameString(dataFormat, POSITION_FORMAT) && multiple) { puts("<BLOCKQUOTE><PRE>\n"); puts("Note: multiple checkbox ignored since it is not supported for position format."); puts("</PRE></BLOCKQUOTE>\n"); } carefulClose(&unmapped); } webDataFormats(); webDownloads(); cartWebEnd(); }
// Compute the initial BWTs for the input file split into blocks of records using the SAIS algorithm MergeVector computeInitialSAIS(const BWTDiskParameters& parameters) { SeqReader* pReader = new SeqReader(parameters.inFile); SeqRecord record; int groupID = 0; size_t numReadTotal = 0; MergeVector mergeVector; MergeItem mergeItem; mergeItem.start_index = 0; // Phase 1: Compute the initial BWTs ReadTable* pCurrRT = new ReadTable; bool done = false; while(!done) { done = !pReader->get(record); if(!done) { // the read is valid SeqItem item = record.toSeqItem(); if(parameters.bBuildReverse) item.seq.reverse(); pCurrRT->addRead(item); ++numReadTotal; } if(pCurrRT->getCount() >= parameters.numReadsPerBatch || (done && pCurrRT->getCount() > 0)) { // Compute the SA and BWT for this group SuffixArray* pSA = new SuffixArray(pCurrRT, 1); // Write the BWT to disk std::string bwt_temp_filename = makeTempName(parameters.outPrefix, groupID, parameters.bwtExtension); pSA->writeBWT(bwt_temp_filename, pCurrRT); std::string sai_temp_filename = makeTempName(parameters.outPrefix, groupID, parameters.saiExtension); pSA->writeIndex(sai_temp_filename); // Push the merge info mergeItem.end_index = numReadTotal - 1; // inclusive mergeItem.reads_filename = parameters.inFile; mergeItem.bwt_filename = bwt_temp_filename; mergeItem.sai_filename = sai_temp_filename; mergeVector.push_back(mergeItem); // Cleanup delete pSA; // Start the new group mergeItem.start_index = numReadTotal; ++groupID; pCurrRT->clear(); } } delete pCurrRT; delete pReader; return mergeVector; }
void hierSort(char *inputList) /* Do a hierarchical merge sort so we don't run out of system file handles */ { int level = 0; char thisName[256]; char nextName[256]; char sortName[256]; struct lineFile *thisLf = NULL; FILE *nextF = NULL; int sortCount = 0; FILE *sortF = NULL; int fileCount = 0; char *files[MAXFILES]; boolean more = FALSE; int block=0; char *line=NULL; safef(nextName, sizeof(nextName), "%s", inputList); do { block=0; safef(thisName, sizeof(thisName), "%s", nextName); safef(nextName, sizeof(nextName), "%sinputList%d-", tempDir, level+1); makeTempName(&tempName, nextName, ".tmp"); safef(nextName, sizeof(nextName), "%s", tempName.forCgi); thisLf = lineFileOpen(thisName,TRUE); if (!thisLf) errAbort("error lineFileOpen(%s) returned NULL\n",thisName); more = lineFileNext(thisLf, &line, NULL); while (more) { int i=0; fileCount = 0; while (more && fileCount < MAXFILES) { files[fileCount++]=cloneString(line); more = lineFileNext(thisLf, &line, NULL); } if (!more && block==0) { /* last level */ sortF = stdout; } else { if (!nextF) nextF = mustOpen(nextName,"w"); safef(sortName, sizeof(sortName), "%ssort%d-", tempDir, sortCount++); makeTempName(&tempName, sortName, ".tmp"); safef(sortName, sizeof(sortName), "%s", tempName.forCgi); fprintf(nextF, "%s\n", sortName); sortF = mustOpen(sortName,"w"); } chainMergeSort(fileCount, files, sortF, level); if (sortF != stdout) carefulClose(&sortF); for(i=0;i<fileCount;++i) freez(&files[i]); verboseDot(); verbose(2,"block=%d\n",block); ++block; } lineFileClose(&thisLf); if (nextF) carefulClose(&nextF); if (level > 0) { remove(thisName); } verbose(1,"\n"); verbose(2,"level=%d, block=%d\n",level,block); ++level; } while (block > 1); }
void doRegressionPlot(struct dyString *script, char *skipPset, char *incTable, char *skipTable, char *geneTable) /* Put up a regression plot. */ { struct dMatrix *skip = NULL, *inc = NULL, *gene = NULL; char query[256]; int i = 0; struct tempName regPlot; double thresh = .75; if(cgiBoolean("allPoints")) thresh = -1; if(cgiBoolean("pdf")) makeTempName(®Plot, "sp", ".pdf"); else makeTempName(®Plot, "sp", ".png"); touchBlank(regPlot.forCgi); sqlSafef(query, sizeof(query), "select * from %s where name like '%%%s%%';", incTable, skipPset); inc = dataFromTable(incTable, query); sqlSafef(query, sizeof(query), "select * from %s where name like '%%%s%%';", skipTable, skipPset); skip = dataFromTable(skipTable, query); sqlSafef(query, sizeof(query), "select * from %s where name like '%%%s%%';", geneTable, skipPset); gene = dataFromTable(geneTable, query); initRegPlotOutput(script, regPlot.forCgi); dyStringPrintf(script, "incDat = c("); for(i = 0; i< inc->colCount; i++) dyStringPrintf(script, "%.4f,", inc->matrix[0][i]); dyStringPrintf(script, ");\n"); dyStringPrintf(script, "skipDat = c("); for(i = 0; i< skip->colCount; i++) dyStringPrintf(script, "%.4f,", skip->matrix[0][i]); dyStringPrintf(script, ");\n"); dyStringPrintf(script, "geneDat = c("); for(i = 0; i< gene->colCount; i++) dyStringPrintf(script, "%.4f,", gene->matrix[0][i]); dyStringPrintf(script, ");\n"); dyStringPrintf(script, "expressed = geneDat > %.4f;\n", thresh); dyStringPrintf(script, abbv); if(cgiBoolean("muscle")) { dyStringPrintf(script, isMuscle); dyStringPrintf(script, plotRegression, altEvent->geneName, "Muscle", "Muscle"); } else { dyStringPrintf(script, isBrain); dyStringPrintf(script, plotRegression, altEvent->geneName, "Brain", "Brain"); } closePlotOutput(script); makePlotLink("Include vs. Skip", regPlot.forCgi); dyStringPrintf(html, "<table width=600><tr><td>\n"); dyStringPrintf(html, "Tissues for which the gene expression was too low to be considered are in lighter colors.<br><br>\n"); dyStringPrintf(html, "%s\n", key); dyStringPrintf(html, "</td></tr></table><br>\n"); //dyStringPrintf(html, "<img src='%s'><br>\n", regPlot.forCgi); }
void makePlot(struct clonePos *xList, struct clonePos *yList, struct hash *yHash) /* Write out graphics for plot. */ { struct memGfx *mg = NULL; struct tempName gifTn; char *mapName = "map"; struct clonePos *xp, *yp; int i, j, x, y, nextX, nextY; int divisions = 10; double invZoom = 1.0/zoom; double magnify = 2.0; double newZoom = zoom*magnify; double invNewZoom = 1.0/newZoom; int xCount = slCount(xList); plotName = (xCount/zoom < 50); if (xList == NULL || yList == NULL) return; font = mgSmallFont(); posSpan(xList, &xStart, &xEnd); posSpan(yList, &yStart, &yEnd); if (pix < 50 || pix > 5000) errAbort("Pixels out of range - must be between 50 an 5000"); mg = mgNew(pix, pix); mgClearPixels(mg); /* Plot dots. */ for (xp = xList; xp != NULL; xp = xp->next) { if ((yp = hashFindVal(yHash, xp->name)) != NULL) { zoomScale(xp->pos, yp->pos, &x, &y); plot(mg, x, y, xp->name, MG_BLACK); } } /* Make zooming image map. */ printf("<MAP Name=%s>\n", mapName); for (i=0; i<divisions; ++i) { double cenX = xOff + (i + 0.5) * (invZoom / divisions); double sx = cenX - invNewZoom/2; x = i*pix/divisions; nextX = (i+1)*pix/divisions; for (j=0; j<divisions; ++j) { double cenY = yOff + (j + 0.5) * (invZoom / divisions); double sy = cenY - invNewZoom/2; y = j*pix/divisions; nextY = (j+1)*pix/divisions; mapZoomIn(x, y, nextX - x, nextY - y, sx, sy, zoom*magnify); } } printf("</MAP>\n"); /* Save image in temp dir. */ makeTempName(&gifTn, "wikPic", ".gif"); mgSaveGif(mg, gifTn.forCgi, FALSE); printf( "<P><IMG SRC = \"%s\" BORDER=1 WIDTH=%d HEIGHT=%d USEMAP=#%s><BR>\n", gifTn.forHtml, pix, pix, mapName); mgFree(&mg); /* Print some extra info. */ printf("X has %d elements ranging from %d to %d<BR>\n", slCount(xList), xStart, xEnd); printf("Y has %d elements ranging from %d to %d<BR>\n", slCount(yList), yStart, yEnd); }