void hgGoldGapGl(char *database, char *gsDir, char *ooSubDir, boolean doGl, char *oneChrom) /* hgGoldGapGl - Put chromosome .agp and .gl files into browser database.. */ { struct fileInfo *chrFiList, *chrFi; struct sqlConnection *conn = NULL; char ooDir[512]; char pathName[512]; struct hash *cloneVerHash = newHash(0); boolean gotAny = FALSE; struct hash *chromDirHash = newHash(4); char *chromLst = optionVal("chromLst", NULL); if (! noLoad) conn = sqlConnect(database); verbose(2,"#\tcomplete gold, gap and .gl files produced\n"); if (chromLst != NULL) { struct lineFile *clf = lineFileOpen(chromLst, TRUE); char *row[1]; while (lineFileRow(clf, row)) { hashAdd(chromDirHash, row[0], NULL); } lineFileClose(&clf); } sprintf(ooDir, "%s/%s", gsDir, ooSubDir); /* target prefix is used in zoo browser */ if (oneChrom != NULL && (startsWith("chr", oneChrom) || startsWith("target", oneChrom))) oneChrom += 3; if (doGl) { sprintf(pathName, "%s/ffa/sequence.inf", gsDir); makeCloneVerHash(pathName, cloneVerHash); } chrFiList = listDirX(ooDir, "*", FALSE); for (chrFi = chrFiList; chrFi != NULL; chrFi = chrFi->next) { if (chrFi->isDir && ((strlen(chrFi->name) <= 2) || startsWith("NA_", chrFi->name) || (NULL != hashLookup(chromDirHash, chrFi->name)))) { if (oneChrom == NULL || sameWord(chrFi->name, oneChrom)) { sprintf(pathName, "%s/%s", ooDir, chrFi->name); makeGoldAndGap(conn, pathName); if (doGl) makeGl(conn, pathName, cloneVerHash); gotAny = TRUE; uglyf("done %s\n", chrFi->name); } } } slFreeList(&chrFiList); if (! noLoad) sqlDisconnect(&conn); hashFree(&chromDirHash); if (!gotAny) errAbort("No contig agp and gold files found"); }
void knownToVisiGene(char *database) /* knownToVisiGene - Create knownToVisiGene table by riffling through various other knownTo tables. */ { char *tempDir = "."; FILE *f = hgCreateTabFile(tempDir, outTable); struct sqlConnection *hConn = sqlConnect(database); struct sqlConnection *iConn = sqlConnect(visiDb); struct sqlResult *sr; char **row; struct hash *geneImageHash = newHash(18); struct hash *locusLinkImageHash = newHash(18); struct hash *refSeqImageHash = newHash(18); struct hash *genbankImageHash = newHash(18); struct hash *probeImageHash = newHash(18); struct hash *knownToLocusLinkHash = newHash(18); struct hash *knownToRefSeqHash = newHash(18); struct hash *knownToGeneHash = newHash(18); struct hash *favorHugoHash = newHash(18); struct hash *knownToProbeHash = newHash(18); struct hash *knownToAllProbeHash = newHash(18); struct genePred *knownList = NULL, *known; struct hash *dupeHash = newHash(17); probesDb = optionVal("probesDb", database); struct sqlConnection *probesConn = sqlConnect(probesDb); vgProbes = sqlTableExists(probesConn,"vgProbes"); vgAllProbes = sqlTableExists(probesConn,"vgAllProbes"); /* Go through and make up hashes of images keyed by various fields. */ sr = sqlGetResult(iConn, "NOSQLINJ select image.id,imageFile.priority,gene.name,gene.locusLink,gene.refSeq,gene.genbank" ",probe.id,submissionSet.privateUser,vgPrbMap.vgPrb,gene.id" " from image,imageFile,imageProbe,probe,gene,submissionSet,vgPrbMap" " where image.imageFile = imageFile.id" " and image.id = imageProbe.image" " and imageProbe.probe = probe.id" " and probe.gene = gene.id" " and image.submissionSet=submissionSet.id" " and vgPrbMap.probe = probe.id"); while ((row = sqlNextRow(sr)) != NULL) { int id = sqlUnsigned(row[0]); float priority = atof(row[1]); int privateUser = sqlSigned(row[7]); char vgPrb_Id[256]; safef(vgPrb_Id, sizeof(vgPrb_Id), "vgPrb_%s",row[8]); int geneId = sqlUnsigned(row[9]); if (privateUser == 0) { addPrioritizedImage(probeImageHash, id, priority, geneId, vgPrb_Id); addPrioritizedImage(geneImageHash, id, priority, geneId, row[2]); addPrioritizedImage(locusLinkImageHash, id, priority, geneId, row[3]); addPrioritizedImage(refSeqImageHash, id, priority, geneId, row[4]); addPrioritizedImage(genbankImageHash, id, priority, geneId, row[5]); } } verbose(2, "Made hashes of image: geneImageHash %d, locusLinkImageHash %d, refSeqImageHash %d" ", genbankImageHash %d probeImageHash %d\n", geneImageHash->elCount, locusLinkImageHash->elCount, refSeqImageHash->elCount, genbankImageHash->elCount, probeImageHash->elCount); sqlFreeResult(&sr); /* Build up list of known genes. */ sr = sqlGetResult(hConn, "NOSQLINJ select * from knownGene"); while ((row = sqlNextRow(sr)) != NULL) { struct genePred *known = genePredLoad(row); if (!hashLookup(dupeHash, known->name)) { hashAdd(dupeHash, known->name, NULL); slAddHead(&knownList, known); } } slReverse(&knownList); sqlFreeResult(&sr); verbose(2, "Got %d known genes\n", slCount(knownList)); /* Build up hashes from knownGene to other things. */ if (vgProbes) bestProbeOverlap(probesConn, "vgProbes", knownList, knownToProbeHash); if (vgAllProbes) bestProbeOverlap(probesConn, "vgAllProbes", knownList, knownToAllProbeHash); foldIntoHash(hConn, "knownToLocusLink", "name", "value", knownToLocusLinkHash, NULL, FALSE); foldIntoHash(hConn, "knownToRefSeq", "name", "value", knownToRefSeqHash, NULL, FALSE); foldIntoHash(hConn, "kgXref", "kgID", "geneSymbol", knownToGeneHash, favorHugoHash, FALSE); foldIntoHash(hConn, "kgAlias", "kgID", "alias", knownToGeneHash, favorHugoHash, TRUE); foldIntoHash(hConn, "kgProtAlias", "kgID", "alias", knownToGeneHash, favorHugoHash, TRUE); verbose(2, "knownToLocusLink %d, knownToRefSeq %d, knownToGene %d knownToProbe %d knownToAllProbe %d\n", knownToLocusLinkHash->elCount, knownToRefSeqHash->elCount, knownToGeneHash->elCount, knownToProbeHash->elCount, knownToAllProbeHash->elCount); /* Try and find an image for each gene. */ for (known = knownList; known != NULL; known = known->next) { char *name = known->name; struct prioritizedImage *best = NULL; { best = bestImage(name, knownToLocusLinkHash, locusLinkImageHash); if (!best) best = bestImage(name, knownToRefSeqHash, refSeqImageHash); if (!best) { best = hashFindVal(genbankImageHash, name); } if (!best) best = bestImage(name, knownToGeneHash, geneImageHash); if (vgProbes && !best) best = bestImage(name, knownToProbeHash, probeImageHash); if (vgAllProbes && !best) best = bestImage(name, knownToAllProbeHash, probeImageHash); } if (best) { fprintf(f, "%s\t%d\t%d\n", name, best->imageId, best->geneId); } } createTable(hConn, outTable); hgLoadTabFile(hConn, tempDir, outTable, &f); hgRemoveTabFile(tempDir, outTable); }
int main(int argc, char *argv[]) { struct sqlConnection *conn, *conn3; char query[256], query3[256]; struct sqlResult *sr, *sr3; char **row, **row3; FILE *o1, *o2; char *locusID; /* LocusLink ID */ char *kgTempDbName, *roDbName; char cond_str[200]; char *kgId; char *mapID; char *desc; char *mRNA; optionInit(&argc, argv, options); if (argc != 3) usage(); kgTempDbName = argv[1]; roDbName = argv[2]; conn = hAllocConn(roDbName); conn3= hAllocConn(roDbName); o1 = fopen("j.dat", "w"); o2 = fopen("jj.dat", "w"); table = optionVal("table", "knownGene"); sqlSafef(query, sizeof(query), "select name from %s.%s", roDbName, table); sr = sqlMustGetResult(conn, query); row = sqlNextRow(sr); while (row != NULL) { kgId = row[0]; sqlSafefFrag(cond_str, sizeof(cond_str), "kgId='%s'", kgId); mRNA = sqlGetField(roDbName, "kgXref", "mRNA", cond_str); sqlSafefFrag(cond_str, sizeof(cond_str), "mrna='%s'", mRNA); locusID = sqlGetField("entrez", "entrezMrna", "geneId", cond_str); /* look for RefSeq if not found in mRNAs */ if (locusID == NULL) { sqlSafefFrag(cond_str, sizeof(cond_str), "refseq='%s'", mRNA); locusID = sqlGetField("entrez", "entrezRefseq", "geneId", cond_str); } if (locusID != NULL) { sqlSafef(query3, sizeof(query3), "select * from %s.keggList where locusID = '%s'", kgTempDbName, locusID); sr3 = sqlGetResult(conn3, query3); while ((row3 = sqlNextRow(sr3)) != NULL) { mapID = row3[1]; desc = row3[2]; fprintf(o1, "%s\t%s\t%s\n", kgId, locusID, mapID); fprintf(o2, "%s\t%s\n", mapID, desc); row3 = sqlNextRow(sr3); } sqlFreeResult(&sr3); } else { /* printf("%s not found in Entrez.\n", kgId);fflush(stdout);*/ if (differentString(table, "knownGene")) { sqlSafefFrag(cond_str, sizeof(cond_str), "name='%s'", kgId); locusID = sqlGetField(roDbName, table, "name2", cond_str); sqlSafef(query3, sizeof(query3), "select * from %s.keggList where locusID = '%s'", kgTempDbName, kgId); sr3 = sqlGetResult(conn3, query3); while ((row3 = sqlNextRow(sr3)) != NULL) { mapID = row3[1]; desc = row3[2]; fprintf(o1, "%s\t%s\t%s\n", kgId, locusID, mapID); fprintf(o2, "%s\t%s\n", mapID, desc); row3 = sqlNextRow(sr3); } sqlFreeResult(&sr3); } } row = sqlNextRow(sr); } fclose(o1); fclose(o2); hFreeConn(&conn); mustSystem("cat j.dat|sort|uniq >keggPathway.tab"); mustSystem("cat jj.dat|sort|uniq >keggMapDesc.tab"); mustSystem("rm j.dat"); mustSystem("rm jj.dat"); return(0); }
void altSummary(char *db, char *agxFileName, char *summaryOutName, char *htmlOutName, char *htmlFramesOutName) /* Look through a bunch of splice sites and output some statistics and links. */ { struct altGraphX *agList = NULL, *ag = NULL; struct altSpliceSite *aSpliceList = NULL, *aSplice=NULL; char *RDataName = optionVal("RData", NULL); char *bedName = optionVal("bedName", NULL); FILE *htmlOut = NULL; FILE *htmlFramesOut = NULL; FILE *summaryOut = NULL; int altSpliceSites = 0, altSpliceLoci = 0, totalSpliceSites = 0; warn("Loading splicing graphs."); agList = altGraphXLoadAll(agxFileName); htmlFramesOut = mustOpen(htmlFramesOutName, "w"); htmlOut = mustOpen(htmlOutName, "w"); summaryOut = mustOpen(summaryOutName, "w"); if(RDataName != NULL) { char buff[256]; safef(buff, sizeof(buff), "%s.control", RDataName); RDataCont = mustOpen(buff, "w"); outputRHeader(RDataCont); safef(buff, sizeof(buff), "%s.alt", RDataName); RData = mustOpen(buff, "w"); outputRHeader(RData); } if(bedName != NULL) { openBedFiles(bedName); } writeOutFrames(htmlFramesOut, htmlOutName, db); carefulClose(&htmlFramesOut); warn("Examining splicing graphs."); fprintf(htmlOut, "<html>\n<body bgcolor=\"#FFF9D2\"><b>Alt-Splice List</b>\n" "<table border=1><tr><th>Name (count)</th><th>Type</th><th>Size</th></tr>\n"); for(ag=agList; ag != NULL; ag=ag->next) { lookForAltSplicing(db, ag, &aSpliceList, &altSpliceSites, &altSpliceLoci, &totalSpliceSites); for(aSplice=aSpliceList; aSplice != NULL; aSplice= aSplice->next) { altSpliceSiteOutput(aSplice, summaryOut, '\t', '\n'); htmlLinkOut(db, aSplice, htmlOut); if(bedViewOutFile != NULL) bedViewOut(aSplice, bedViewOutFile); } altSpliceSiteFreeList(&aSpliceList); } warn("\nDone."); fprintf(htmlOut,"</body></html>\n"); warn("%d altSpliced sites in %d alt-spliced loci out of %d total loci.", altSpliceSites, altSpliceLoci, slCount(agList)); printSpliceTypeInfo(altSpliceLoci); altGraphXFreeList(&agList); if(RData != NULL) { carefulClose(&RData); carefulClose(&RDataCont); } carefulClose(&htmlOut); carefulClose(&summaryOut); }
void paraNode() /* paraNode - a net server. */ { char *line; char *command; struct sockaddr_in sai; /* We have to know who we are... */ hostName = getMachine(); initRandom(); getTicksToHundreths(); /* log init */ if (optionExists("log")) logOpenFile("paraNode", optionVal("log", NULL)); else logOpenSyslog("paraNode", optionVal("logFacility", NULL)); logSetMinPriority(optionVal("logMinPriority", "info")); logInfo("starting paraNode on %s", hostName); /* Make job lists. */ jobsRunning = newDlList(); jobsFinished = newDlList(); /* Set up socket and self to listen to it. */ ZeroVar(&sai); sai.sin_family = AF_INET; sai.sin_port = htons(paraNodePort); sai.sin_addr.s_addr = INADDR_ANY; mainRudp = rudpMustOpenBound(&sai); mainRudp->maxRetries = 12; /* Event loop. */ findNow(); for (;;) { /* Get next incoming message and optionally check to make * sure that it's from a host we trust, and check signature * on first bit of incoming data. */ if (pmReceive(&pmIn, mainRudp)) { findNow(); if (hubName == NULL || ntohl(pmIn.ipAddress.sin_addr.s_addr) == hubIp || ntohl(pmIn.ipAddress.sin_addr.s_addr) == localIp) { /* Host and signature look ok, read a string and * parse out first word as command. */ line = pmIn.data; logDebug("message from %s: \"%s\"", paraFormatIp(ntohl(pmIn.ipAddress.sin_addr.s_addr)), line); command = nextWord(&line); if (command != NULL) { if (sameString("quit", command)) break; else if (sameString("run", command)) doRun(line, &pmIn.ipAddress); else if (sameString("jobDone", command)) jobDone(line); else if (sameString("status", command)) doStatus(); else if (sameString("kill", command)) doKill(line); else if (sameString("check", command)) doCheck(line, &pmIn.ipAddress); else if (sameString("resurrect", command)) doResurrect(line, &pmIn.ipAddress); else if (sameString("listJobs", command)) listJobs(); else if (sameString("fetch", command)) doFetch(line); else logWarn("invalid command: \"%s\"", command); } logDebug("done command"); } else { logWarn("command from unauthorized host %s", paraFormatIp(ntohl(pmIn.ipAddress.sin_addr.s_addr))); } } } rudpClose(&mainRudp); }
void splitByCount(char *inName, int pieceSize, char *outRoot, off_t estSize, int extra) /* Split up file into pieces pieceSize long. */ { off_t pieces = (estSize + pieceSize-1)/pieceSize; int digits = digitsBaseTen(pieces); int maxN = optionInt("maxN", pieceSize-1); boolean oneFile = optionExists("oneFile"); char fileName[PATH_LEN]; char dirOnly[PATH_LEN], noPath[128]; int pos, pieceIx = 0, writeCount = 0; struct dnaSeq seq; struct lineFile *lf = lineFileOpen(inName, TRUE); FILE *f = NULL; Bits *bits = NULL; int seqCount = 0; char *outFile = optionVal("out", NULL); char *liftFile = optionVal("lift", NULL); FILE *lift = NULL; ZeroVar(&seq); splitPath(outRoot, dirOnly, noPath, NULL); if (oneFile) { sprintf(fileName, "%s.fa", outRoot); f = mustOpen(fileName, "w"); } if (liftFile) lift = mustOpen(liftFile, "w"); /* Count number of N's from s[0] to s[size-1]. * Treat any parts past end of string as N's. */ while (faMixedSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name)) { bits = bitAlloc(seq.size); setBitsN(seq.dna, seq.size, bits); ++seqCount; if (outFile != NULL) { if (seqCount > 1) errAbort("Can only handle in files with one sequence using out option"); bitsForOut(outFile, seq.size, bits); } for (pos = 0; pos < seq.size; pos += pieceSize) { char numOut[128]; int thisSize = seq.size - pos; if (thisSize > (pieceSize + extra)) thisSize = pieceSize + extra; if ((thisSize <= extra) && (pos > 0)) break; /* nobody wants duplicate smaller than extra overhang */ if (bitCountRange(bits, pos, thisSize) <= maxN) { if (!oneFile) { mkOutPath(fileName, outRoot, digits, pieceIx); f = mustOpen(fileName, "w"); } sprintf(numOut, "%s%0*d", noPath, digits, pieceIx); faWriteNext(f, numOut, seq.dna + pos, thisSize); if (lift) fprintf(lift, "%d\t%s\t%d\t%s\t%d\n", pos, numOut, thisSize, seq.name, seq.size); ++writeCount; if (!oneFile) carefulClose(&f); } pieceIx++; } bitFree(&bits); } carefulClose(&f); carefulClose(&lift); lineFileClose(&lf); printf("%d pieces of %d written\n", writeCount, pieceIx); }
int main(int argc, char *argv[]) { struct lineFile *sif, *dsf, *daf, *gbf; FILE *of, *opf, *oaf, *off, *asf, *dff; char filename[256], *gbName; int verb = 0; verboseSetLevel(0); optionInit(&argc, argv, optionSpecs); if (argc < 3) { fprintf(stderr, "USAGE: updateStsInfo [-verbose=<level> -gb=<file>] <stsInfo file> <all.STS.fa> <dbSTS.sts> <dbSTS.aliases> <dbSTS.convert.fa> <outfile prefix>\n"); return 1; } verb = optionInt("verbose", 0); verboseSetLevel(verb); gbName = optionVal("gb", NULL); if (gbName) gbf = lineFileOpen(gbName, TRUE); sif = lineFileOpen(argv[1], TRUE); asf = mustOpen(argv[2], "r"); dsf = lineFileOpen(argv[3], TRUE); daf = lineFileOpen(argv[4], TRUE); dff = mustOpen(argv[5], "r"); safef(filename, ArraySize(filename), "%s.info", argv[6]); of = mustOpen(filename, "w"); safef(filename, ArraySize(filename), "%s.primers", argv[6]); opf = mustOpen(filename, "w"); safef(filename, ArraySize(filename), "%s.alias", argv[6]); oaf = mustOpen(filename, "w"); safef(filename, ArraySize(filename), "%s.fa", argv[6]); off = mustOpen(filename, "w"); /* Read in current stsInfo file */ verbose(1, "Reading current stsInfo file: %s\n", argv[1]); readStsInfo(sif); /* Read in genbank accessions that have sequences */ if (gbName) { verbose(1, "Reading genbank accession file: %s\n", gbName); readGbAcc(gbf); } /* Read in primer and organism information from dbSTS.sts */ verbose(1, "Reading current dbSTS.sts file: %s\n", argv[3]); readDbstsPrimers(dsf); /* Read in names from dbSTS.alias and create new stsInfo records if needed */ verbose(1, "Reading current dbSTS.aliases file: %s\n", argv[4]); readDbstsNames(daf); /* Read in current sequences for sts markers */ verbose(1, "Reading current all.STS file: %s\n", argv[2]); readAllSts(asf); /* Read in new sequences from dbSTS.fa */ verbose(1, "Reading dbSTS.fa file: %s\n", argv[5]); readDbstsFa(dff); /* Print out the new files */ verbose(1, "Creating output files: %s .info .primers .alias .fa\n", argv[6]); writeOut(of, opf, oaf, off); fclose(asf); lineFileClose(&dsf); lineFileClose(&daf); fclose(dff); if (gbName) lineFileClose(&gbf); fclose(of); fclose(opf); fclose(oaf); fclose(off); return(0); }
void hgExpDistance(char *database, char *posTable, char *expTable, char *outTable) /* hgExpDistance - Create table that measures expression distance between pairs. */ { struct sqlConnection *conn = sqlConnect(database); struct sqlResult *sr; char query[256]; char **row; struct hash *expHash = hashNew(16); int realExpCount = -1; struct microData *gene; int rc, t; pthread_t *threads = NULL; pthread_attr_t attr; int *threadID = NULL; void *status; char *tempDir = "."; int arrayNum; struct microDataDistance *geneDistPtr = NULL; struct microDataDistance *geneDistArray = NULL; int geneIx; FILE *f = NULL; /* Get list/hash of all items with expression values. */ safef(query, sizeof(query), "select name,expCount,expScores from %s", posTable); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { char *name = row[0]; if (!hashLookup(expHash, name)) { int expCount = sqlUnsigned(row[1]); int commaCount; float *expScores = NULL; sqlFloatDynamicArray(row[2], &expScores, &commaCount); if (expCount != commaCount) errAbort("expCount and expScores don't match on %s in %s", name, posTable); if (realExpCount == -1) realExpCount = expCount; if (expCount != realExpCount) errAbort("In %s some rows have %d experiments others %d", name, expCount, realExpCount); AllocVar(gene); gene->expCount = expCount; gene->expScores = expScores; hashAddSaveName(expHash, name, gene, &gene->name); slAddHead(&geneList, gene); } } sqlFreeResult(&sr); conn = sqlConnect(database); slReverse(&geneList); geneCount = slCount(geneList); printf("Have %d elements in %s\n", geneCount, posTable); weights = getWeights(realExpCount); if (optionExists("lookup")) geneList = lookupGenes(conn, optionVal("lookup", NULL), geneList); geneCount = slCount(geneList); printf("Got %d unique elements in %s\n", geneCount, posTable); sqlDisconnect(&conn); /* Disconnect because next step is slow. */ if (geneCount < 1) errAbort("ERROR: unique gene count less than one ?"); f = hgCreateTabFile(tempDir, outTable); synQ = synQueueNew(); /* instantiate threads */ AllocArray( threadID, numThreads ); AllocArray( threads, numThreads ); pthread_attr_init( &attr ); pthread_mutex_init( &mutexDotOut, NULL ); pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_JOINABLE ); for (t = 0; t < numThreads; t++) { threadID[t] = t; rc = pthread_create( &threads[t], &attr, computeDistance, (void *) &threadID[t]); if (rc) errAbort("ERROR: in pthread_create() %d\n", rc ); } /* this thread will write to the file from the queue */ for (arrayNum = 0; arrayNum < geneCount; arrayNum++) { geneDistArray = (struct microDataDistance *)synQueueGet( synQ ); geneDistPtr = geneDistArray; /* Print out closest GENEDISTS distances in tab file. */ for (geneIx=0; geneIx < GENEDISTS && geneIx < geneCount; ++geneIx, geneDistPtr++) if (geneDistPtr != NULL) fprintf(f, "%s\t%s\t%f\n", geneDistPtr->name1, geneDistPtr->name2, geneDistPtr->distance); else errAbort("ERROR: writing distance %d to file\n", geneIx); freeMem( geneDistArray ); } /* synchronize all threads */ for (t = 0; t < numThreads; t++) { rc = pthread_join( threads[t], &status); if (rc) errAbort("ERROR: in pthread_join() %d\n", rc ); } printf("Made %s.tab\n", outTable); slFreeList( &geneList ); pthread_mutex_destroy( &mutexDotOut ); pthread_attr_destroy( &attr ); /* Create and load table. */ conn = sqlConnect(database); distanceTableCreate(conn, outTable); hgLoadTabFile(conn, tempDir, outTable, &f); printf("Loaded %s\n", outTable); /* Add indices. */ safef(query, sizeof(query), "alter table %s add index(query(12))", outTable); sqlUpdate(conn, query); printf("Made query index\n"); if (optionExists("targetIndex")) { safef(query, sizeof(query), "alter table %s add index(target(12))", outTable); sqlUpdate(conn, query); printf("Made target index\n"); } hgRemoveTabFile(tempDir, outTable); }
void pickIntrons() /** Top level routine, actually picks the introns. */ { char *htmlFileName=NULL, *htmlFrameFileName=NULL; char *bedFileName=NULL, *orthoBedFileName=NULL; FILE *htmlOut=NULL, *htmlFrameOut=NULL; FILE *bedOut=NULL, *orthoBedOut=NULL; char *orthoEvalFile = NULL; char *db = NULL; struct orthoEval *ev=NULL, *evList = NULL; struct intronEv *iv=NULL, *ivList = NULL; int maxPicks = optionInt("numPicks", 100); int i=0; boolean isRefSeq=FALSE, isMgcBad=FALSE; struct hash *posHash = newHash(12), *agxHash = newHash(12); struct bed *bed = NULL; char buff[256]; htmlFileName = optionVal("htmlFile", NULL); htmlFrameFileName = optionVal("htmlFrameFile", "frame.html"); orthoEvalFile = optionVal("orthoEvalFile", NULL); db = optionVal("db", NULL); bedFileName = optionVal("bedOutFile", NULL); orthoBedFileName = optionVal("orthoBedOut", NULL); if(htmlFileName == NULL || orthoEvalFile == NULL || db == NULL || bedFileName == NULL || orthoBedFileName == NULL ) errAbort("Missing parameters. Use -help for usage."); warn("Loading orthoEvals."); evList = orthoEvalLoadAll(orthoEvalFile); warn("Creating intron records"); for(ev = evList; ev != NULL; ev = ev->next) { for(i=0; i<ev->numIntrons; i++) { occassionalDot(); iv = intronIvForEv(ev, i); slAddHead(&ivList, iv); } } warn("\nDone"); warn("Sorting"); slSort(&ivList, intronEvalCmp); warn("Done."); htmlOut = mustOpen(htmlFileName, "w"); bedOut = mustOpen(bedFileName, "w"); htmlFrameOut = mustOpen(htmlFrameFileName, "w"); orthoBedOut = mustOpen(orthoBedFileName, "w"); i=0; fprintf(htmlOut, "<html><body><table border=1><tr><th>Num</th><th>Mouse Acc.</th><th>Score</th><th>TS Pick</th></tr>\n"); warn("Filtering"); safef(buff, sizeof(buff), "tmp"); for(iv = ivList; iv != NULL && maxPicks > 0; iv = iv->next) { if(isUniqueCoordAndAgx(db, iv, posHash, agxHash) && iv->support == 0 && !isOverlappedByRefSeq(db, iv) && ! isOverlappedByEst(db, iv) && ! isOverlappedByMRna(db, iv)) { boolean twinScan = (coordOverlappedByTable(db, iv->chrom, iv->e1S, iv->e1E, "mgcTSExpPcr") && coordOverlappedByTable(db, iv->chrom, iv->e2S, iv->e2E, "mgcTSExpPcr")); bed = bedForIv(iv); if(sameString(buff, "tmp")) safef(buff, sizeof(buff), "%s:%d-%d", bed->chrom, bed->chromStart-50, bed->chromEnd+50); // isMgcBad = isOverlappedByMgcBad(iv); fprintf(htmlOut, "<tr><td>%d</td><td><a target=\"browser\" " "href=\"http://mgc.cse.ucsc.edu/cgi-bin/hgTracks?db=hg15&position=%s:%d-%d\"> " "%s </a></td><td>%d</td><td>%s</td></tr>\n", ++i,bed->chrom, bed->chromStart-50, bed->chromEnd+50, bed->name, bed->score, twinScan ? "yes" : "no"); bedTabOutN(bed, 12, bedOut); bedTabOutN(iv->ev->orthoBed, 12, orthoBedOut); bedFree(&bed); maxPicks--; } } writeOutFrames(htmlFrameOut, htmlFileName, db, bedFileName, buff); fprintf(htmlOut, "</table></body></html>\n"); carefulClose(&bedOut); carefulClose(&htmlOut); carefulClose(&htmlFrameOut); carefulClose(&orthoBedOut); warn("Done."); hashFree(&posHash); hashFree(&agxHash); }
void outputBedsFromPsls(struct hash *pslHash,char *bedOutName, char *expRecordOutName, char *affyFileName, char *expFileName) /** For each set of entries in affyFile find matching psl and create a bed. */ { struct bed *bed = NULL, *b=NULL; struct psl *pslList = NULL, *psl = NULL; struct hash *expHash = NULL; int numExps = 0; int expCount = 0; int i =0; char *probeSet = NULL; char *row[4]; char key[128]; struct slName *expNames = NULL, *name = NULL; FILE *bedOut = NULL; FILE *expRecordOut = NULL; char *toDiffFileName = optionVal("toDiffFile", NULL); FILE *toDiffOut = NULL; struct lineFile *lf = NULL; fillInExpHash(expFileName, &expHash, &expNames, &expCount); lf = lineFileOpen(affyFileName, TRUE); bedOut = mustOpen(bedOutName, "w"); if(toDiffFileName != NULL) toDiffOut = mustOpen(toDiffFileName, "w"); /* Loop through either adding experiments to beds or if new probeset create bed from psl and start over. */ while(lineFileChopNextTab(lf, row, sizeof(row))) { /* Do we have to make a new bed? */ if(probeSet == NULL || differentWord(probeSet, row[0])) { occassionalDot(); numExps = 0; /* If we have probeset print out the current beds. */ if(probeSet != NULL) { for(b = bed; b != NULL; b = b->next) { int avgCount = 0; for(i = 0; i < b->expCount; i++) if(b->expScores[i] != -10000) avgCount++; if(avgCount != 0 && b->score > 0) b->score = log(b->score / avgCount) * 100; else b->score = 0; bedTabOutN(b, 15, bedOut); if(toDiffOut != NULL) outputToDiffRecord(b, expNames, toDiffOut); } } bedFreeList(&bed); /* Lookup key in pslHash to find list of psl. */ safef(key, sizeof(key), "%s", row[0]); pslList = hashFindVal(pslHash, key); /* Can have multiple psls. */ for(psl = pslList; psl != NULL; psl = psl->next) { b = bedFromPsl(psl); AllocArray(b->expIds, expCount ); AllocArray(b->expScores, expCount); b->expCount = expCount; initBedScores(b, expCount); slAddHead(&bed, b); } } if(bed != NULL) { /* Allocate larger arrays if necessary. */ if(numExps > expCount) { errAbort("Supposed to be %d experiments but probeset %s has at least %d", expCount, bed->name, numExps); } for(b = bed; b != NULL; b = b->next) { int exp = hashIntVal(expHash, row[1]); if(differentWord(row[3], "NaN")) b->expScores[exp] = atof(row[3]); if(differentWord(row[2], "NaN")) b->score += atof(row[2]); } numExps++; } freez(&probeSet); probeSet = cloneString(row[0]); } expRecordOut = mustOpen(expRecordOutName, "w"); i = 0; for(name = expNames; name != NULL; name = name->next) { subChar(name->name, ',', '_'); subChar(name->name, ' ', '_'); fprintf(expRecordOut, "%d\t%s\tuclaExp\tuclaExp\tuclaExp\tuclaExp\t1\t%s,\n", i++, name->name, name->name); } hashFree(&expHash); slFreeList(&expNames); carefulClose(&expRecordOut); carefulClose(&bedOut); lineFileClose(&lf); }
void featureBits(char *database, int tableCount, char *tables[]) /* featureBits - Correlate tables via bitmap projections and booleans. */ { struct sqlConnection *conn = NULL; char *bedName = optionVal("bed", NULL), *faName = optionVal("fa", NULL); char *binName = optionVal("bin", NULL); char *bedRegionInName = optionVal("bedRegionIn", NULL); char *bedRegionOutName = optionVal("bedRegionOut", NULL); FILE *bedFile = NULL, *faFile = NULL, *binFile = NULL; FILE *bedRegionOutFile = NULL; struct bed *bedRegionList = NULL; boolean faIndependent = FALSE; struct chromInfo *cInfo; if (bedName) bedFile = mustOpen(bedName, "w"); if (binName) binFile = mustOpen(binName, "w"); if ((bedRegionInName && !bedRegionOutName) || (!bedRegionInName && bedRegionOutName)) errAbort("bedRegionIn and bedRegionOut must both be specified"); if (faName) { boolean faMerge = optionExists("faMerge"); faFile = mustOpen(faName, "w"); if (tableCount > 1) { if (!faMerge) errAbort("For fa output of multiple tables you must use the " "faMerge option"); } faIndependent = (!faMerge); } if (chromSizes != NULL) chromInfoList = chromInfoLoadAll(chromSizes); else chromInfoList = fbCreateChromInfoList(clChrom, database); if (!countGaps) conn = hAllocConn(database); checkInputExists(conn, database, chromInfoList, tableCount, tables); if (!faIndependent) { double totalBases = 0, totalBits = 0; int firstTableBits = 0, secondTableBits = 0; int *pFirstTableBits = NULL, *pSecondTableBits = NULL; double totalFirstBits = 0, totalSecondBits = 0; static int dotClock = 1; if (calcEnrichment) { pFirstTableBits = &firstTableBits; pSecondTableBits = &secondTableBits; } if (bedRegionInName) { struct lineFile *lf = lineFileOpen(bedRegionInName, TRUE); struct bed *bed; char *row[3]; bedRegionOutFile = mustOpen(bedRegionOutName, "w"); while (lineFileRow(lf, row)) { if (startsWith(row[0],"#")||startsWith(row[0],"chrom")) continue; bed = bedLoad3(row); slAddHead(&bedRegionList, bed); } lineFileClose(&lf); slReverse(&bedRegionList); } for (cInfo = chromInfoList; cInfo != NULL; cInfo = cInfo->next) { if (inclChrom(cInfo->chrom)) { int chromBitSize; int chromSize = cInfo->size; verbose(3,"chromFeatureBits(%s)\n", cInfo->chrom); chromFeatureBits(conn, database, cInfo->chrom, tableCount, tables, bedFile, faFile, binFile, bedRegionList, bedRegionOutFile, chromSize, &chromBitSize, pFirstTableBits, pSecondTableBits ); totalBases += countBases(conn, cInfo->chrom, chromSize, database); totalBits += chromBitSize; totalFirstBits += firstTableBits; totalSecondBits += secondTableBits; if (dots > 0) { if (--dotClock <= 0) { fputc('.', stdout); fflush(stdout); dotClock = dots; } } } } if (dots > 0) { fputc('\n', stdout); fflush(stdout); } if (calcEnrichment) fprintf(stderr,"%s %5.3f%%, %s %5.3f%%, both %5.3f%%, cover %4.2f%%, enrich %4.2fx\n", tables[0], 100.0 * totalFirstBits/totalBases, tables[1], 100.0 * totalSecondBits/totalBases, 100.0 * totalBits/totalBases, 100.0 * totalBits / totalFirstBits, (totalBits/totalSecondBits) / (totalFirstBits/totalBases) ); else fprintf(stderr,"%1.0f bases of %1.0f (%4.3f%%) in intersection\n", totalBits, totalBases, 100.0*totalBits/totalBases); } else { int totalItems = 0; double totalBases = 0; int itemCount, baseCount; for (cInfo = chromInfoList; cInfo != NULL; cInfo = cInfo->next) { if (inclChrom(cInfo->chrom)) { chromFeatureSeq(conn, database, cInfo->chrom, tables[0], bedFile, faFile, &itemCount, &baseCount); totalBases += countBases(conn, cInfo->chrom, baseCount, database); totalItems += itemCount; } } } hFreeConn(&conn); }
int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, optionSpecs); if (optionExists("algoHelp")) prAlgo(); if (argc != 3) usage("wrong # of args"); gLocalNearBest = optionFrac("localNearBest", gLocalNearBest); gGlobalNearBest = optionFrac("globalNearBest", gGlobalNearBest); if ((gLocalNearBest >= 0.0) && (gGlobalNearBest >= 0.0)) errAbort("can only specify one of -localNearBest and -globalNearBest"); if (optionExists("usePolyTHead")) gCDnaOpts |= cDnaUsePolyTHead; if (optionExists("ignoreNs")) gCDnaOpts |= cDnaIgnoreNs; if (optionExists("ignoreIntrons")) gCDnaOpts |= cDnaIgnoreIntrons; if (optionExists("repsAsMatch")) gCDnaOpts |= cDnaRepsAsMatch; gMinId = optionFrac("minId", gMinId); gMinCover = optionFrac("minCover", gMinCover); gMinSpan = optionFrac("minSpan", gMinSpan); gMinQSize = optionInt("minQSize", gMinQSize); gMaxAligns = optionInt("maxAligns", gMaxAligns); gMaxAlignsDrop = optionInt("maxAlignsDrop", gMaxAlignsDrop); if ((gMaxAligns >= 0) && (gMaxAlignsDrop >= 0)) errAbort("cannot specify both -maxAligns and -maxAlignsDrop"); gMinAlnSize = optionInt("minAlnSize", gMinAlnSize); gMinNonRepSize = optionInt("minNonRepSize", gMinNonRepSize); gMaxRepMatch = optionFrac("maxRepMatch", gMaxRepMatch); gPolyASizes = optionVal("polyASizes", NULL); if (optionExists("usePolyTHead") && (gPolyASizes == NULL)) errAbort("must specify -polyASizes with -usePolyTHead"); gHapRegions = optionVal("hapRegions", NULL); gBestOverlap = optionExists("bestOverlap"); gDropped = optionVal("dropped", NULL); gWeirdOverlappped = optionVal("weirdOverlapped", NULL); gFilterWeirdOverlapped = optionExists("filterWeirdOverlapped"); gHapRefMapped = optionVal("hapRefMapped", NULL); gHapRefCDnaAlns = optionVal("hapRefCDnaAlns", NULL); gHapLociAlns = optionVal("hapLociAlns", NULL); if (optionExists("noValidate")) gValidate = FALSE; cDnaAlignsAlnIdQNameMode = optionExists("alnIdQNameMode"); if (optionExists("ignoreNs")) gCDnaOpts |= cDnaIgnoreNs; gUniqueMapped = optionExists("uniqueMapped"); gDecayMinCover = optionExists("decayMinCover"); char *blackList = optionVal("blackList", NULL); if (blackList != NULL) gBlackListRanges = genbankBlackListParse(blackList); if ( gDecayMinCover && (gMinCover > 0.0)) errAbort("can only specify one of -minCoverage and -decayMinCoverage"); pslCDnaFilter(argv[1], argv[2]); return 0; }
int main(int argc, char *argv[]) /* Process command line. */ { boolean drop, move, copy; struct sigaction sigSpec; setlinebuf(stdout); setlinebuf(stderr); ZeroVar(&sigSpec); sigSpec.sa_handler = sigStopSignaled; sigSpec.sa_flags = SA_RESTART; if (sigaction(SIGUSR1, &sigSpec, NULL) < 0) errnoAbort("can't set SIGUSR1 handler"); optionInit(&argc, argv, optionSpecs); drop = optionExists("drop"); move = optionExists("move"); copy = optionExists("copy"); gReload = optionExists("reload"); if (move || copy) { if (argc != 3) usage(); } else if (argc != 2) usage(); if ((drop+move+copy) > 1) errAbort("can only specify one of -drop, -move, or -copy"); gbVerbInit(optionInt("verbose", 0)); if (gbVerbose >= 6) sqlMonitorEnable(JKSQL_TRACE); if (drop) dropAll(argv[1]); else if (move) moveAll(argv[1], argv[2]); else if (copy) copyAll(argv[1], argv[2]); else { char *reloadList = optionVal("reloadList", NULL); gDatabase = argv[1]; gOptions = dbLoadOptionsParse(gDatabase); gForceIgnoreDelete = optionExists("forceIgnoreDelete"); if (optionExists("rebuildDerived")) gOptions.flags |= DBLOAD_BYPASS_GBLOADED|DBLOAD_REBUILD_DERIVED; gMaxShrinkage = optionFloat("maxShrinkage", 0.1); gGbdbGenBank = optionVal("gbdbGenBank", NULL); if (gGbdbGenBank == NULL) gGbdbGenBank = gbConfGet(gOptions.conf, "gbdb.genbank"); if (gGbdbGenBank == NULL) gGbdbGenBank = "/gbdb/genbank"; gWorkDir = optionVal("workdir", "work/load"); if (gOptions.flags & DBLOAD_DRY_RUN) printf("*** using dry run mode ***\n"); gbLoadRna(reloadList); } return 0; }
int main(int argc, char *argv[]) /* Process command line. */ { char *db, *cdsDb, *cdsFile, *pslSpec, *genePredFile; int optCnt; optionInit(&argc, argv, optionSpecs); if (argc != 3) usage(); pslSpec = argv[1]; genePredFile = argv[2]; db = optionVal("db", NULL); cdsDb = optionVal("cdsDb", NULL); cdsFile = optionVal("cdsFile", NULL); gRequireUtr = optionExists("requireUtr"); if (optionExists("cdsMergeMod3") && !optionExists("cdsMergeSize")) errAbort("must specify -cdsMergeSize with -cdsMergeMod3"); if (optionExists("cdsMergeSize") || optionExists("utrMergeSize")) { gCdsMergeSize = optionInt("cdsMergeSize", -1); gUtrMergeSize = optionInt("utrMergeSize", -1); if (optionExists("cdsMergeMod3")) gPslOptions |= genePredPslCdsMod3; if (optionExists("smallInsertSize") || optionExists("insertMergeSize")) errAbort("can't specify -smallInsertSize or -insertMergeSize with -cdsMergeSize or -utrMergeSize"); } else { int insertMergeSize = genePredStdInsertMergeSize; if (optionExists("smallInsertSize")) insertMergeSize = optionInt("smallInsertSize", genePredStdInsertMergeSize); insertMergeSize = optionInt("insertMergeSize", genePredStdInsertMergeSize); gCdsMergeSize = gUtrMergeSize = insertMergeSize; } gGenePredExt = optionExists("genePredExt"); gKeepInvalid = optionExists("keepInvalid"); gAllCds = optionExists("allCds"); gNoCds = optionExists("noCds"); gQuiet = optionExists("quiet"); gIgnoreUniqSuffix = optionExists("ignoreUniqSuffix"); if ((gAllCds || gNoCds) && ((cdsDb != NULL) || (cdsFile != NULL))) errAbort("can't specify -allCds or -noCds with -cdsDb or -cdsFile"); if (gAllCds && gRequireUtr) errAbort("can't specify -allCds with -requireUtr"); /* this is a bit of work to implement */ if ((gAllCds || gNoCds) && (db != NULL)) errAbort("can't specify -allCds or -noCds with -db"); optCnt = 0; if (db != NULL) optCnt++; if (cdsDb == NULL) optCnt++; if (cdsFile != NULL) optCnt++; if (gAllCds) optCnt++; if (gNoCds) optCnt++; if (optCnt == 1) errAbort("must specify one and only one of -db, -cdsDb, -cdsFile, -allCds, or -noCds"); mrnaToGene(db, cdsDb, cdsFile, pslSpec, genePredFile); return 0; }
int main(int argc, char *argv[]) /* Process command line. */ { char *chromInfo; optionInit(&argc, argv, optionSpecs); if (argc < 4) usage(); noBin = optionExists("noBin") || optionExists("nobin"); noSort = optionExists("noSort"); strictTab = optionExists("tab"); oldTable = optionExists("oldTable"); sqlTable = optionVal("sqlTable", sqlTable); renameSqlTable = optionExists("renameSqlTable"); trimSqlTable = optionExists("trimSqlTable"); as = optionVal("as", as); type = optionVal("type", type); hasBin = optionExists("hasBin"); noLoad = optionExists("noLoad"); noHistory = optionExists("noHistory"); bedGraph = optionInt("bedGraph",0); bedDetail = optionExists("bedDetail"); minScore = optionInt("minScore",100); if (minScore<0 || minScore>1000) errAbort("minScore must be between 0-1000\n"); notItemRgb = optionExists("notItemRgb"); if (notItemRgb) itemRgb = FALSE; maxChromNameLength = optionInt("maxChromNameLength",0); dotIsNull = optionInt("dotIsNull",dotIsNull); noStrict = optionExists("noStrict") || optionExists("nostrict"); allowStartEqualEnd = optionExists("allowStartEqualEnd"); tmpDir = optionVal("tmpDir", tmpDir); nameIx = ! optionExists("noNameIx"); ignoreEmpty = optionExists("ignoreEmpty"); allowNegativeScores = optionExists("allowNegativeScores"); customTrackLoader = optionExists("customTrackLoader"); parseType(); /* turns on: noNameIx, ignoreEmpty, allowStartEqualEnd, allowNegativeScores * -verbose=0 */ if (customTrackLoader) { type = NULL; /* because customTrack/Factory has already validated the input */ ignoreEmpty = TRUE; noHistory = TRUE; nameIx = FALSE; allowStartEqualEnd = TRUE; allowNegativeScores = TRUE; verboseSetLevel(0); expireSeconds = 1200; /* 20 minutes */ (void) signal(SIGALRM, selfApoptosis); (void) alarm(expireSeconds); /* CGI timeout */ } fillInScoreColumn = optionVal("fillInScore", NULL); chromInfo=optionVal("chromInfo", NULL); if (chromInfo) { if (!type) errAbort("Only use chromInfo with type for validate"); // Get chromInfo from file chrHash = chromHashFromFile(chromInfo); } else if (type) { // Get chromInfo from DB chrHash = chromHashFromDatabase(argv[1]); } hgLoadBed(argv[1], argv[2], argc-3, argv+3); return 0; }
int main(int argc, char *argv[]) { struct lineFile *pf, *ef, *apf; FILE *of, *nf, *enf=NULL; char *efName=NULL, filename[256]; int verb = 0; verboseSetLevel(0); optionInit(&argc, argv, optionSpecs); if (argc < 3) { fprintf(stderr, "USAGE: pslAnal [-epcr=<file> -verbose=<level>] <isPCR psl file> <all.primers> <outfile>\n"); return 1; } verb = optionInt("verbose", 0); verboseSetLevel(verb); efName = optionVal("epcr", NULL); pf = pslFileOpen(argv[1]); apf = lineFileOpen(argv[2], TRUE); of = mustOpen(argv[3], "w"); sprintf(filename, "%s.notfound.primers", argv[3]); nf = mustOpen(filename, "w"); verbose(1, "Reading all.primers file\n"); readPrimerInfo(apf); if (efName) { ef = lineFileOpen(efName, TRUE); verbose(1, "Reading epcr file\n"); readEpcr(ef); } verbose(1, "Reading and processing isPCR file\n"); processPrimers(pf, of); if (efName) { verbose(1, "Writing epcr.not.found file\n"); sprintf(filename, "epcr.not.found"); enf = mustOpen(filename, "w"); writeEpcrNotFound(enf); } verbose(1, "Writing out primers not found\n"); writePrimersNotFound(nf); if (efName) { lineFileClose(&ef); fclose(enf); } lineFileClose(&pf); lineFileClose(&apf); fclose(of); fclose(nf); return(0); }
int main(int argc, char *argv[]) /* Process command line. */ { struct sqlConnection *conn = NULL; char *command = NULL; optionInit(&argc, argv, options); database = optionVal("database", database); sqlPath = optionVal("sqlPath", sqlPath); if (argc < 2) usage(); command = argv[1]; if (argc >= 3) setCurrentDir(argv[2]); conn = sqlConnect(database); if (sameWord(command,"INIT")) { if (argc != 2) usage(); errAbort("INIT is probably too dangerous. DO NOT USE."); /* init(conn); */ } else if (sameWord(command,"POP")) { if (argc != 2) usage(); /* populate vgPrb where missing */ populateMissingVgPrb(conn); } else if (sameWord(command,"SEQ")) { if (argc != 4) usage(); /* make fake probe sequences */ makeFakeProbeSeq(conn,argv[3]); } else if (sameWord(command,"ALI")) { if (argc != 4) usage(); /* blat anything left that is not aligned, nor even attempted */ doAlignments(conn,argv[3]); } else if (sameWord(command,"EXT")) { if (argc != 4) usage(); /* update seq and extfile as necessary */ doSeqAndExtFile(conn,argv[3],"vgProbes"); } else if (sameWord(command,"PSLMAP")) { if (argc != 5) usage(); /* pslMap anything left that is not aligned, nor even attempted */ doAlignmentsPslMap(conn,argv[3],argv[4]); } else if (sameWord(command,"REMAP")) { if (argc != 7) usage(); /* re-map anything in track specified that is not aligned, nor even attempted yet, using specified fasta file. */ doAlignmentsReMap(conn,argv[3],argv[4],argv[5],argv[6]); } else if (sameWord(command,"SELFMAP")) { if (argc != 4) usage(); /* re-map anything in track specified that is not aligned, nor even attempted yet, using specified fasta file. */ doAlignmentsSelfMap(conn,argv[3]); } else if (sameWord(command,"EXTALL")) { if (argc != 4) usage(); /* update seq and extfile as necessary */ doSeqAndExtFile(conn,argv[3],"vgAllProbes"); } else usage(); sqlDisconnect(&conn); return 0; }
int main(int argc, char *argv[]) /* Process command line. */ { int i; char *cp; unsigned long long reversed; size_t maxAlloc; char asciiAlloc[32]; optionInit(&argc, argv, options); if (argc < 2) usage(); maxAlloc = 2100000000 * (((sizeof(size_t)/4)*(sizeof(size_t)/4)*(sizeof(size_t)/4))); sprintLongWithCommas(asciiAlloc, (long long) maxAlloc); verbose(4, "#\tmaxAlloc: %s\n", asciiAlloc); setMaxAlloc(maxAlloc); /* produces: size_t is 4 == 2100000000 ~= 2^31 = 2Gb * size_t is 8 = 16800000000 ~= 2^34 = 16 Gb */ dnaUtilOpen(); motif = optionVal("motif", NULL); chr = optionVal("chr", NULL); strand = optionVal("strand", NULL); bedOutput = optionExists("bedOutput"); wigOutput = optionExists("wigOutput"); if (wigOutput) bedOutput = FALSE; else bedOutput = TRUE; if (chr) verbose(2, "#\tprocessing chr: %s\n", chr); if (strand) verbose(2, "#\tprocessing strand: '%s'\n", strand); if (motif) verbose(2, "#\tsearching for motif: %s\n", motif); else { warn("ERROR: -motif string empty, please specify a motif\n"); usage(); } verbose(2, "#\ttype output: %s\n", wigOutput ? "wiggle data" : "bed format"); verbose(2, "#\tspecified sequence: %s\n", argv[1]); verbose(2, "#\tsizeof(motifVal): %d\n", (int)sizeof(motifVal)); if (strand) { if (! (sameString(strand,"+") | sameString(strand,"-"))) { warn("ERROR: -strand specified ('%s') is not + or - ?\n", strand); usage(); } /* They are both on by default, turn off the one not specified */ if (sameString(strand,"-")) doPlusStrand = FALSE; if (sameString(strand,"+")) doMinusStrand = FALSE; } motifLen = strlen(motif); /* at two bits per character, size limit of motif is * number of bits in motifVal / 2 */ if (motifLen > (4*sizeof(motifVal))/2 ) { warn("ERROR: motif string too long, limit %d\n", (4*(int)sizeof(motifVal))/2 ); usage(); } cp = motif; motifVal = 0; complementVal = 0; for (i = 0; i < motifLen; ++i) { switch (*cp) { case 'a': case 'A': motifVal = (motifVal << 2) | A_BASE_VAL; complementVal = (complementVal << 2) | T_BASE_VAL; break; case 'c': case 'C': motifVal = (motifVal << 2) | C_BASE_VAL; complementVal = (complementVal << 2) | G_BASE_VAL; break; case 'g': case 'G': motifVal = (motifVal << 2) | G_BASE_VAL; complementVal = (complementVal << 2) | C_BASE_VAL; break; case 't': case 'T': motifVal = (motifVal << 2) | T_BASE_VAL; complementVal = (complementVal << 2) | A_BASE_VAL; break; default: warn( "ERROR: character in motif: '%c' is not one of ACGT\n", *cp); usage(); } ++cp; } reversed = 0; for (i = 0; i < motifLen; ++i) { int base; base = complementVal & 3; reversed = (reversed << 2) | base; complementVal >>= 2; } complementVal = reversed; verbose(2, "#\tmotif numerical value: %llu (%#llx)\n", motifVal, motifVal); verbose(2, "#\tcomplement numerical value: %llu (%#llx)\n", complementVal, complementVal); if (motifLen < 5) { warn("ERROR: motif string must be more than 4 characters\n"); usage(); } findMotif(argv[1]); return 0; }
/* entry */ int main(int argc, char** argv) { char *selectFile, *inFile, *outFile, *dropFile; optionInit(&argc, argv, optionSpecs); if (argc != 4) usage("wrong # args"); selectFile = argv[1]; inFile = argv[2]; outFile = argv[3]; /* select file options */ if (optionExists("selectFmt") && optionExists("selectCoordCols")) errAbort("can't specify both -selectFmt and -selectCoordCols"); if (optionExists("selectFmt")) selectFmt = parseFormatSpec(optionVal("selectFmt", NULL)); else if (optionExists("selectCoordCols")) { selectCoordCols = coordColsParseSpec("selectCoordCols", optionVal("selectCoordCols", NULL)); selectFmt = COORD_COLS_FMT; } else selectFmt = getFileFormat(selectFile); if (optionExists("selectCds")) selectCaOpts |= chromAnnCds; if (optionExists("selectRange")) selectCaOpts |= chromAnnRange; if ((selectFmt == PSLQ_FMT) || (selectFmt == CHAINQ_FMT)) selectCaOpts |= chromAnnUseQSide; /* in file options */ if (optionExists("inFmt") && optionExists("inCoordCols")) errAbort("can't specify both -inFmt and -inCoordCols"); if (optionExists("inFmt")) inFmt = parseFormatSpec(optionVal("inFmt", NULL)); else if (optionExists("inCoordCols")) { inCoordCols = coordColsParseSpec("inCoordCols", optionVal("inCoordCols", NULL)); inFmt = COORD_COLS_FMT; } else inFmt = getFileFormat(inFile); inCaOpts = chromAnnSaveLines; // need lines for output if (optionExists("inCds")) inCaOpts |= chromAnnCds; if (optionExists("inRange")) inCaOpts |= chromAnnRange; if ((inFmt == PSLQ_FMT) || (inFmt == CHAINQ_FMT)) inCaOpts |= chromAnnUseQSide; /* select options */ useAggregate = optionExists("aggregate"); nonOverlapping = optionExists("nonOverlapping"); if (optionExists("strand") && optionExists("oppositeStrand")) errAbort("can only specify one of -strand and -oppositeStrand"); if (optionExists("strand")) selectOpts |= selStrand; if (optionExists("oppositeStrand")) selectOpts |= selOppositeStrand; if (optionExists("excludeSelf") && (optionExists("idMatch"))) errAbort("can't specify both -excludeSelf and -idMatch"); if (optionExists("excludeSelf")) selectOpts |= selExcludeSelf; if (optionExists("idMatch")) selectOpts |= selIdMatch; criteria.threshold = optionFloat("overlapThreshold", 0.0); criteria.thresholdCeil = optionFloat("overlapThresholdCeil", 1.1); criteria.similarity = optionFloat("overlapSimilarity", 0.0); criteria.similarityCeil = optionFloat("overlapSimilarityCeil", 1.1); criteria.bases = optionInt("overlapBases", -1); /* output options */ mergeOutput = optionExists("mergeOutput"); idOutput = optionExists("idOutput"); statsOutput = optionExists("statsOutput") || optionExists("statsOutputAll") || optionExists("statsOutputBoth"); if ((mergeOutput + idOutput + statsOutput) > 1) errAbort("can only specify one of -mergeOutput, -idOutput, -statsOutput, -statsOutputAll, or -statsOutputBoth"); outputAll = optionExists("statsOutputAll"); outputBoth = optionExists("statsOutputBoth"); if (outputBoth) outputAll = TRUE; if (mergeOutput) { if (nonOverlapping) errAbort("can't use -mergeOutput with -nonOverlapping"); if (useAggregate) errAbort("can't use -mergeOutput with -aggregate"); if ((selectFmt == CHAIN_FMT) || (selectFmt == CHAINQ_FMT) || (inFmt == CHAIN_FMT) || (inFmt == CHAINQ_FMT)) if (useAggregate) errAbort("can't use -mergeOutput with chains"); selectCaOpts |= chromAnnSaveLines; } dropFile = optionVal("dropped", NULL); /* check for options incompatible with aggregate mode */ if (useAggregate) { int i; for (i = 0; aggIncompatible[i] != NULL; i++) { if (optionExists(aggIncompatible[i])) errAbort("-%s is not allowed -aggregate", aggIncompatible[i]); } } overlapSelect(selectFile, inFile, outFile, dropFile); return 0; }
int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); if (optionExists("help")) { printHelp(); } if (argc != 3) { usage(); } outMdb = optionVal("outMdb", outMdb); onlyCompTdb = optionExists("onlyCompTdb"); release = optionVal("release", release); releaseNum = optionInt("releaseNum", releaseNum); char *database = argv[1]; char *composite = argv[2]; char defaultMetaDb[1024]; char defaultDownloadDir[1024]; char tempDownloadDir[1024]; char *src = getSrcDir(); char *org = cloneString(hOrganism(database)); org[0] = tolower(org[0]); /* If user doesn't provide a metaDB, assume the path using the database and composite */ safef(defaultMetaDb, sizeof(defaultMetaDb), "%s/hg/makeDb/trackDb/%s/%s/metaDb/%s/%s.ra", src, org, database, release, composite); /* If user doesn't provide a downloadDir, assume the path using the database and composite */ safef(defaultDownloadDir, sizeof(defaultDownloadDir), "/usr/local/apache/htdocs-hgdownload/goldenPath/%s/encodeDCC/%s", database, composite); safef(tempDownloadDir, sizeof(tempDownloadDir), "/usr/local/apache/htdocs-hgdownload/goldenPath/%s/encodeDCC/%s", database, composite); if (releaseNum) safef(defaultDownloadDir, sizeof(defaultDownloadDir), "%s/release%d", tempDownloadDir, releaseNum); /* If user doesn't provide a trackDB, assume the path using the database and composite */ char defaultTrackDb[1024]; /* Load encode composite-includer trackDb.wgEncode.ra */ char trackDbIncluder[1024]; safef(trackDbIncluder, sizeof(trackDbIncluder), "%s/hg/makeDb/trackDb/%s/%s/%s", src, org, database, "trackDb.wgEncode.ra"); struct raFile *includerFile = raFileRead(trackDbIncluder); /* Find the correct trackDb.ra for the composite */ int numTagsFound = -1; char *compositeName = findCompositeRa(includerFile, composite, release, &numTagsFound); if (!compositeName) errAbort("unable to find composite .ra for the track in trackDb.wgEncode.ra\n"); // if numTagsFound == 1 then a composite .ra with a single alpha tag exists already, // so no further work required on trackDb.wgEncode.ra safef(defaultTrackDb, sizeof(defaultTrackDb), "%s/hg/makeDb/trackDb/%s/%s/%s", src, org, database, compositeName); verbose(1,"database: %s\ncomposite: %s\nrelease %s\ndefault trackDb: %s\ndefault metaDb: %s\ndefault downloadDir: %s\n", database, composite, release, defaultTrackDb, defaultMetaDb, defaultDownloadDir); char *metaDb = optionVal("metaDb",defaultMetaDb); replaceTildeWithHome(&metaDb); if (!fileExists(metaDb)) errAbort("metaDb %s does not exist.", metaDb); char *trackDb = optionVal("trackDb",defaultTrackDb); replaceTildeWithHome(&trackDb); if (!fileExists(trackDb)) errAbort("trackDb %s does not exist.", trackDb); char *downloadDir = optionVal("downloadDir",defaultDownloadDir); replaceTildeWithHome(&downloadDir); if (!fileExists(downloadDir)) errAbort("downloadDir %s does not exist.", downloadDir); printf("metaDb = %s\n trackDb = %s\n downloadDir = %s\n",metaDb,trackDb,downloadDir); metaCheck(database, composite, metaDb, trackDb, downloadDir); return 0; }
int main(int argc, char *argv[]) { struct lineFile *pf, *ef, *apf; FILE *of, *nf, *enf=NULL; char *efName=NULL, filename[256], notFound[256]; int verb = 0; verboseSetLevel(0); optionInit(&argc, argv, optionSpecs); if (argc < 3) { verbose(0, "usage: pslFilterPrimers [-epcr=<file> -verbose=<level>] <isPCR psl file> <all.primers> <outfile>\n"); return 1; } verb = optionInt("verbose", 0); verboseSetLevel(verb); efName = optionVal("epcr", NULL); pf = pslFileOpen(argv[1]); apf = lineFileOpen(argv[2], TRUE); of = mustOpen(argv[3], "w"); safef(notFound, sizeof(filename), "%s.notfound.primers", argv[3]); nf = mustOpen(notFound, "w"); verbose(1, "Reading all primers file: '%s'\n", argv[2]); readPrimerInfo(apf); if (efName) { ef = lineFileOpen(efName, TRUE); verbose(1, "Reading epcr file: '%s'\n", efName); readEpcr(ef); } verbose(1, "Reading isPCR file: '%s' processing output to: '%s'\n", argv[1], argv[3]); processPrimers(pf, of); if (efName) { safef(filename, sizeof(filename), "epcr.not.found"); verbose(1, "Writing %s file\n", filename); enf = mustOpen(filename, "w"); writeEpcrNotFound(enf); } verbose(1, "Writing primers not found to file: '%s'\n", notFound); writePrimersNotFound(nf); if (efName) { lineFileClose(&ef); fclose(enf); } lineFileClose(&pf); lineFileClose(&apf); fclose(of); fclose(nf); return(0); }
void splitByGap(char *inName, int pieceSize, char *outRoot, long long estSize) /* Split up file into pieces at most pieceSize bases long, at gap boundaries * if possible. */ { off_t pieces = (estSize + pieceSize-1)/pieceSize; int digits = digitsBaseTen(pieces); int minGapSize = optionInt("minGapSize", 1000); boolean noGapDrops = optionExists("noGapDrops"); int maxN = optionInt("maxN", pieceSize-1); boolean oneFile = optionExists("oneFile"); char fileName[512]; char dirOnly[256], noPath[128]; int pos, pieceIx = 0, writeCount = 0; struct dnaSeq seq; struct lineFile *lf = lineFileOpen(inName, TRUE); FILE *f = NULL; Bits *bits = NULL; int seqCount = 0; char *outFile = optionVal("out", NULL); char *liftFile = optionVal("lift", NULL); FILE *lift = NULL; ZeroVar(&seq); if (minGapSize < 1) errAbort("ERROR: minGapSize must be > 0"); splitPath(outRoot, dirOnly, noPath, NULL); if (oneFile) { sprintf(fileName, "%s.fa", outRoot); f = mustOpen(fileName, "w"); } else fileName[0] = '\0'; if (liftFile) lift = mustOpen(liftFile, "w"); while (faMixedSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name)) { bits = bitAlloc(seq.size); setBitsN(seq.dna, seq.size, bits); ++seqCount; if (outFile != NULL) { if (seqCount > 1) errAbort("Can only handle in files with one sequence using out option"); bitsForOut(outFile, seq.size, bits); } pos = 0; while (pos < seq.size) { boolean gotGap = FALSE; int gapStart = 0; int gapSize = 0; int endSize = seq.size - pos; int thisSize = min(endSize, pieceSize); int startGapLen = 0; if (seq.dna[pos] == 'n' || seq.dna[pos] == 'N') { startGapLen = bitFindClear(bits, pos, endSize) - pos; verbose(3,"#\tstarting gap at %d for length: %d\n", pos, startGapLen ); } /* if a block is all gap for longer than minGapSize, then * keep it all together in one large piece */ if (startGapLen > minGapSize) { if (noGapDrops) { writeOneByGap(oneFile, outRoot, digits, &pieceIx, f, noPath, pos, startGapLen, &seq, lift, &writeCount, fileName); } else verbose(3,"#\tbeginning gap of %d size skipped\n", startGapLen); thisSize = startGapLen; } else if (thisSize > 0 && bitCountRange(bits, pos, thisSize) <= maxN) { if (endSize>pieceSize) /* otherwise chops tiny piece at very end */ { gotGap = findLastGap(&(seq.dna[pos]), thisSize, endSize, minGapSize, &gapStart, &gapSize); if (gotGap) thisSize = gapStart; } writeOneByGap(oneFile, outRoot, digits, &pieceIx, f, noPath, pos, thisSize, &seq, lift, &writeCount, fileName); } pos += thisSize; if (gotGap) { /* last block is all gap, write it all out */ /*if ((pos + gapSize) >= seq.size)*/ if (noGapDrops) { writeOneByGap(oneFile, outRoot, digits, &pieceIx, f, noPath, pos, gapSize, &seq ,lift, &writeCount, fileName); verbose(3, "#\tadding gapSize %d to pos %d -> %d and writing gap\n", gapSize, pos, pos+gapSize); } else verbose(3,"#\tadding gapSize %d to pos %d -> %d\n", gapSize, pos, pos+gapSize); pos += gapSize; } } bitFree(&bits); } carefulClose(&f); carefulClose(&lift); lineFileClose(&lf); printf("%d pieces of %d written\n", writeCount, pieceIx); }
void hgLoadChromGraph(boolean doLoad, char *db, char *track, char *fileName) /* hgLoadChromGraph - Load up chromosome graph. */ { double minVal,maxVal; struct chromGraph *el, *list; FILE *f; char *tempDir = "."; char path[PATH_LEN], gbdbPath[PATH_LEN]; char *idTable = optionVal("idTable", NULL); char *pathPrefix = NULL; if (idTable == NULL) list = chromGraphLoadAll(fileName); else list = chromGraphListWithTable(fileName, db, idTable); if (list == NULL) errAbort("%s is empty", fileName); /* Figure out min/max values */ minVal = maxVal = list->val; for (el = list->next; el != NULL; el = el->next) { if (optionExists("minusLog10")) { if (el->val == 1) el->val = 0; else if (el->val > 0) el->val = -1 * log(el->val)/log(10); } if (el->val < minVal) minVal = el->val; if (el->val > maxVal) maxVal = el->val; } /* Sort and write out temp file. */ slSort(&list, chromGraphCmp); f = hgCreateTabFile(tempDir, track); for (el = list; el != NULL; el = el->next) chromGraphTabOut(el, f); if (doLoad) { struct dyString *dy = dyStringNew(0); struct sqlConnection *conn; /* Set up connection to database and create main table. */ conn = hAllocConn(db); sqlDyStringPrintf(dy, createString, track, hGetMinIndexLength(db)); sqlRemakeTable(conn, track, dy->string); /* Load main table and clean up file handle. */ hgLoadTabFile(conn, tempDir, track, &f); hgRemoveTabFile(tempDir, track); /* If need be create meta table. If need be delete old row. */ if (!sqlTableExists(conn, "metaChromGraph")) sqlUpdate(conn, metaCreateString); else { dyStringClear(dy); sqlDyStringPrintf(dy, "delete from metaChromGraph where name = '%s'", track); sqlUpdate(conn, dy->string); } /* Make chrom graph file */ safef(path, sizeof(path), "%s.cgb", track); chromGraphToBin(list, path); safef(path, sizeof(path), "/gbdb/%s/chromGraph", db); pathPrefix = optionVal("pathPrefix", path); safef(gbdbPath, sizeof(gbdbPath), "%s/%s.cgb", pathPrefix, track); /* Create new line in meta table */ dyStringClear(dy); sqlDyStringPrintf(dy, "insert into metaChromGraph values('%s',%f,%f,'%s');", track, minVal, maxVal, gbdbPath); sqlUpdate(conn, dy->string); } }
void hgExpDistance(char *database, char *posTable, char *expTable, char *outTable) /* hgExpDistance - Create table that measures expression distance between pairs. */ { struct sqlConnection *conn = sqlConnect(database); struct sqlResult *sr; char query[256]; char **row; struct hash *expHash = hashNew(16); int realExpCount = -1; struct microData *geneList = NULL, *curGene, *gene; int geneIx, geneCount = 0; struct microData **geneArray = NULL; float *weights = NULL; char *tempDir = "."; FILE *f = hgCreateTabFile(tempDir, outTable); long time1, time2; time1 = clock1000(); /* Get list/hash of all items with expression values. */ /* uglyf("warning: temporarily limited to 1000 records\n"); */ sqlSafef(query, sizeof(query), "select name,expCount,expScores from %s", posTable); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { char *name = row[0]; if (!hashLookup(expHash, name)) { int expCount = sqlUnsigned(row[1]); int commaCount; float *expScores = NULL; sqlFloatDynamicArray(row[2], &expScores, &commaCount); if (expCount != commaCount) errAbort("expCount and expScores don't match on %s in %s", name, posTable); if (realExpCount == -1) realExpCount = expCount; if (expCount != realExpCount) errAbort("In %s some rows have %d experiments others %d", name, expCount, realExpCount); AllocVar(gene); gene->expCount = expCount; gene->expScores = expScores; hashAddSaveName(expHash, name, gene, &gene->name); slAddHead(&geneList, gene); } } sqlFreeResult(&sr); conn = sqlConnect(database); slReverse(&geneList); geneCount = slCount(geneList); printf("Have %d elements in %s\n", geneCount, posTable); weights = getWeights(realExpCount); if (optionExists("lookup")) geneList = lookupGenes(conn, optionVal("lookup", NULL), geneList); geneCount = slCount(geneList); printf("Got %d unique elements in %s\n", geneCount, posTable); sqlDisconnect(&conn); /* Disconnect because next step is slow. */ if (geneCount < 1) errAbort("ERROR: unique gene count less than one ?"); time2 = clock1000(); verbose(2, "records read time: %.2f seconds\n", (time2 - time1) / 1000.0); /* Get an array for sorting. */ AllocArray(geneArray, geneCount); for (gene = geneList,geneIx=0; gene != NULL; gene = gene->next, ++geneIx) geneArray[geneIx] = gene; /* Print out closest 1000 in tab file. */ for (curGene = geneList; curGene != NULL; curGene = curGene->next) { calcDistances(curGene, geneList, weights); qsort(geneArray, geneCount, sizeof(geneArray[0]), cmpMicroDataDistance); for (geneIx=0; geneIx < 1000 && geneIx < geneCount; ++geneIx) { gene = geneArray[geneIx]; fprintf(f, "%s\t%s\t%f\n", curGene->name, gene->name, gene->distance); } dotOut(); } printf("Made %s.tab\n", outTable); time1 = time2; time2 = clock1000(); verbose(2, "distance computation time: %.2f seconds\n", (time2 - time1) / 1000.0); /* Create and load table. */ conn = sqlConnect(database); distanceTableCreate(conn, outTable); hgLoadTabFile(conn, tempDir, outTable, &f); printf("Loaded %s\n", outTable); /* Add indices. */ sqlSafef(query, sizeof(query), "alter table %s add index(query(12))", outTable); sqlUpdate(conn, query); printf("Made query index\n"); if (optionExists("targetIndex")) { sqlSafef(query, sizeof(query), "alter table %s add index(target(12))", outTable); sqlUpdate(conn, query); printf("Made target index\n"); } hgRemoveTabFile(tempDir, outTable); time1 = time2; time2 = clock1000(); verbose(2, "table create/load/index time: %.2f seconds\n", (time2 - time1) / 1000.0); }
void hgExpDistance(char *database, char *posTable, char *expTable, char *outTable) /* hgExpDistance - Create table that measures expression distance between pairs. */ { struct sqlConnection *conn = sqlConnect(database); struct sqlResult *sr; char query[256]; char **row; struct hash *expHash = hashNew(16); int realExpCount = -1; struct microData *gene; int rc, t; pthread_t *threads = NULL; pthread_attr_t attr; int *threadID = NULL; void *status; char *tempDir = "."; long time1, time2; time1 = clock1000(); /* Get list/hash of all items with expression values. */ sqlSafef(query, sizeof(query), "select name,expCount,expScores from %s", posTable); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { char *name = row[0]; if (!hashLookup(expHash, name)) { int expCount = sqlUnsigned(row[1]); int commaCount; float *expScores = NULL; sqlFloatDynamicArray(row[2], &expScores, &commaCount); if (expCount != commaCount) errAbort("expCount and expScores don't match on %s in %s", name, posTable); if (realExpCount == -1) realExpCount = expCount; if (expCount != realExpCount) errAbort("In %s some rows have %d experiments others %d", name, expCount, realExpCount); AllocVar(gene); gene->expCount = expCount; gene->expScores = expScores; hashAddSaveName(expHash, name, gene, &gene->name); slAddHead(&geneList, gene); } } sqlFreeResult(&sr); conn = sqlConnect(database); slReverse(&geneList); geneCount = slCount(geneList); printf("Have %d elements in %s\n", geneCount, posTable); weights = getWeights(realExpCount); if (optionExists("lookup")) geneList = lookupGenes(conn, optionVal("lookup", NULL), geneList); geneCount = slCount(geneList); printf("Got %d unique elements in %s\n", geneCount, posTable); sqlDisconnect(&conn); /* Disconnect because next step is slow. */ if (geneCount < 1) errAbort("ERROR: unique gene count less than one ?"); time2 = clock1000(); verbose(2, "records read time: %.2f seconds\n", (time2 - time1) / 1000.0); f = hgCreateTabFile(tempDir, outTable); /* instantiate threads */ AllocArray( threadID, numThreads ); AllocArray( threads, numThreads ); pthread_attr_init( &attr ); pthread_mutex_init( &mutexfilehandle, NULL ); pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_JOINABLE ); for (t = 0; t < numThreads; t++) { threadID[t] = t; rc = pthread_create( &threads[t], &attr, computeDistance, (void *) &threadID[t]); if (rc) errAbort("ERROR: in pthread_create() %d\n", rc ); } /* synchronize all threads */ for (t = 0; t < numThreads; t++) { rc = pthread_join( threads[t], &status); if (rc) errAbort("ERROR: in pthread_join() %d\n", rc ); } printf("Made %s.tab\n", outTable); slFreeList( &geneList ); pthread_mutex_destroy( &mutexfilehandle ); pthread_attr_destroy( &attr ); time1 = time2; time2 = clock1000(); verbose(2, "distance computation time: %.2f seconds\n", (time2 - time1) / 1000.0); /* Create and load table. */ conn = sqlConnect(database); distanceTableCreate(conn, outTable); hgLoadTabFile(conn, tempDir, outTable, &f); printf("Loaded %s\n", outTable); /* Add indices. */ sqlSafef(query, sizeof(query), "alter table %s add index(query(12))", outTable); sqlUpdate(conn, query); printf("Made query index\n"); if (optionExists("targetIndex")) { sqlSafef(query, sizeof(query), "alter table %s add index(target(12))", outTable); sqlUpdate(conn, query); printf("Made target index\n"); } hgRemoveTabFile(tempDir, outTable); time1 = time2; time2 = clock1000(); verbose(2, "table create/load/index time: %.2f seconds\n", (time2 - time1) / 1000.0); }
void consForBed() /* Open and read the bed file. Load consFile into an double array for easy access and process. */ { char *bedFileName = NULL; char *chrom = NULL; struct bed *bedList = NULL, *bed = NULL; char *consFileName = NULL; int *consProb = NULL; char *consBedName = NULL; FILE *consBedOut = NULL; char *summaryBedName = NULL; FILE *summaryBedOut = NULL; /* Get the output file names. */ consBedName = optionVal("bedConsOut", NULL); if(consBedName == NULL) errAbort("Must specify an output file for bed conservation."); summaryBedName = optionVal("summary", NULL); /* What chromosome are we on? */ chrom = optionVal("chrom", NULL); if(chrom == NULL) errAbort("Must specify a chromosome."); /* read in the beds. */ warn("Reading in beds."); bedFileName = optionVal("bedFile", NULL); if(bedFileName != NULL) bedList = bedLoadAll(bedFileName); else errAbort("Must specify a bedFile.\n"); /* Read in the conservation scores. */ consFileName = optionVal("consFile", NULL); if(consFileName != NULL) consProb = readInConservationVals(consFileName); else errAbort("Must specify a conservation file."); /* Open output files */ consBedOut = mustOpen(consBedName,"w"); if(summaryBedName != NULL) summaryBedOut = mustOpen(summaryBedName, "w"); /* Process each individual bed. */ warn("Writing out conservation for beds."); for(bed = bedList; bed != NULL; bed = bed->next) { if(differentString(chrom, bed->chrom)) continue; outputBedConservation(bed, consProb, consBedOut, summaryBedOut); } warn("Cleaning up"); carefulClose(&consBedOut); carefulClose(&summaryBedOut); freez(&consProb); warn("Done."); }
void xmfaToMaf(char *in, char *out) /* xmfaToMaf - Convert from xmfa to maf format. */ { int c; FILE *input = mustOpen(in, "r"); FILE *output = mustOpen(out, "w"); char* commentLine; struct dnaSeq* sequence; struct mafAli *ali; struct sqlConnection* conn = hAllocConn(); mafWriteStart(output, "mlagan"); AllocVar(ali); while(myFaReadMixedNext(input, TRUE, "default name", TRUE, &commentLine, &sequence)) { char srcName[128]; c = fgetc(input); if(c == '=' || c == '>') { /* add the current sequence and process the block if we've see an '='*/ char org[32]; char chrom[32]; int start; int stop; char strand; struct mafComp *comp; double score; char buffer[1024]; ungetc(c, input); AllocVar(comp); /* parse the comment line */ sscanf(commentLine, ">%s %[^:]:%d-%d %c", org, chrom, &start, &stop, &strand); /* build the name */ safef(srcName, sizeof(srcName), "%s.%s", optionVal(org, org), chrom); comp->src = cloneString(srcName); sqlSafef(buffer, 1024, "SELECT size FROM %s.chromInfo WHERE chrom = \"%s\"", optionVal(org, org), chrom); assert(sqlQuickQuery(conn, buffer, buffer, 1024) != 0); comp->srcSize = atoi(buffer); comp->strand = strand; start = start - 1; comp->start = start; comp->size = ungappedSize(sequence); if(strand == '-') comp->start = comp->srcSize - (comp->start + comp->size); comp->text = sequence->dna; sequence->dna = 0; slAddHead(&ali->components, comp); freeDnaSeq(&sequence); if(c == '=') { fscanf(input, "= score=%lf\n", &score); ali->score = score; slReverse(&ali->components); mafWrite(output, ali); mafAliFree(&ali); AllocVar(ali); } } } mafWriteEnd(output); }