void pslSort(char *command, char *outFile, char *tempDir, char *inDirs[], int inDirCount) /* Do the two step sort. */ { int i; struct slName *fileList = NULL, *name; char *inDir; struct slName *dirDir, *dirFile; char fileName[512]; int fileCount; int totalFilesProcessed = 0; int filesPerMidFile; int midFileCount = 0; FILE *f; struct lineFile *lf; boolean doReflect = FALSE; boolean suppressSelf = FALSE; boolean firstOnly = endsWith(command, "1"); boolean secondOnly = endsWith(command, "2"); if (startsWith("dirs", command)) ; else if (startsWith("g2g", command)) { doReflect = TRUE; suppressSelf = TRUE; } else usage(); if (!secondOnly) { makeDir(tempDir); /* Figure out how many files to process. */ for (i=0; i<inDirCount; ++i) { inDir = inDirs[i]; dirDir = listDir(inDir, "*.psl"); if (slCount(dirDir) == 0) dirDir = listDir(inDir, "*.psl.gz"); if (slCount(dirDir) == 0) errAbort("No psl files in %s\n", inDir); verbose(1, "%s with %d files\n", inDir, slCount(dirDir)); for (dirFile = dirDir; dirFile != NULL; dirFile = dirFile->next) { sprintf(fileName, "%s/%s", inDir, dirFile->name); name = newSlName(fileName); slAddHead(&fileList, name); } slFreeList(&dirDir); } verbose(1, "%d files in %d dirs\n", slCount(fileList), inDirCount); slReverse(&fileList); fileCount = slCount(fileList); filesPerMidFile = round(sqrt(fileCount)); // if (filesPerMidFile > 20) // filesPerMidFile = 20; /* bandaide! Should keep track of mem usage. */ verbose(1, "Got %d files %d files per mid file\n", fileCount, filesPerMidFile); /* Read in files a group at a time, sort, and write merged, sorted * output of one group. */ name = fileList; while (totalFilesProcessed < fileCount) { int filesInMidFile = 0; struct psl *pslList = NULL, *psl; int lfileCount = 0; struct lm *lm = lmInit(256*1024); for (filesInMidFile = 0; filesInMidFile < filesPerMidFile && name != NULL; ++filesInMidFile, ++totalFilesProcessed, name = name->next) { boolean reflectMe = FALSE; if (doReflect) { reflectMe = !selfFile(name->name); } verbose(2, "Reading %s (%d of %d)\n", name->name, totalFilesProcessed+1, fileCount); lf = pslFileOpen(name->name); while ((psl = nextLmPsl(lf, lm)) != NULL) { if (psl->qStart == psl->tStart && psl->strand[0] == '+' && suppressSelf && sameString(psl->qName, psl->tName)) { continue; } ++lfileCount; slAddHead(&pslList, psl); if (reflectMe) { psl = mirrorLmPsl(psl, lm); slAddHead(&pslList, psl); } } lineFileClose(&lf); } slSort(&pslList, pslCmpQuery); makeMidName(tempDir, midFileCount, fileName); verbose(1, "Writing %s\n", fileName); f = mustOpen(fileName, "w"); if (!nohead) pslWriteHead(f); for (psl = pslList; psl != NULL; psl = psl->next) { pslTabOut(psl, f); } fclose(f); pslList = NULL; lmCleanup(&lm); verbose(2, "lfileCount %d\n", lfileCount); ++midFileCount; } } if (!firstOnly) pslSort2(outFile, tempDir); }
int netConnectHttps(char *hostName, int port) /* Start https connection with server or die. */ { errAbort("No openssl available in netConnectHttps for %s : %d", hostName, port); return -1; /* will never get to here, make compiler happy */ }
void usage() { errAbort("vulgarToPsl - Convert the vulgar exonerate format to PSL.\n" "usage:\n" " vulgarToPsl input.vul proteinQ.fa dnaT.fa output.psl"); }
void hgLoadChromGraph(boolean doLoad, char *db, char *track, char *fileName) /* hgLoadChromGraph - Load up chromosome graph. */ { double minVal,maxVal; struct chromGraph *el, *list; FILE *f; char *tempDir = "."; char path[PATH_LEN], gbdbPath[PATH_LEN]; char *idTable = optionVal("idTable", NULL); char *pathPrefix = NULL; if (idTable == NULL) list = chromGraphLoadAll(fileName); else list = chromGraphListWithTable(fileName, db, idTable); if (list == NULL) errAbort("%s is empty", fileName); /* Figure out min/max values */ minVal = maxVal = list->val; for (el = list->next; el != NULL; el = el->next) { if (optionExists("minusLog10")) { if (el->val == 1) el->val = 0; else if (el->val > 0) el->val = -1 * log(el->val)/log(10); } if (el->val < minVal) minVal = el->val; if (el->val > maxVal) maxVal = el->val; } /* Sort and write out temp file. */ slSort(&list, chromGraphCmp); f = hgCreateTabFile(tempDir, track); for (el = list; el != NULL; el = el->next) chromGraphTabOut(el, f); if (doLoad) { struct dyString *dy = dyStringNew(0); struct sqlConnection *conn; /* Set up connection to database and create main table. */ conn = hAllocConn(db); dyStringPrintf(dy, createString, track, hGetMinIndexLength(db)); sqlRemakeTable(conn, track, dy->string); /* Load main table and clean up file handle. */ hgLoadTabFile(conn, tempDir, track, &f); hgRemoveTabFile(tempDir, track); /* If need be create meta table. If need be delete old row. */ if (!sqlTableExists(conn, "metaChromGraph")) sqlUpdate(conn, metaCreateString); else { dyStringClear(dy); dyStringPrintf(dy, "delete from metaChromGraph where name = '%s'", track); sqlUpdate(conn, dy->string); } /* Make chrom graph file */ safef(path, sizeof(path), "%s.cgb", track); chromGraphToBin(list, path); safef(path, sizeof(path), "/gbdb/%s/chromGraph", db); pathPrefix = optionVal("pathPrefix", path); safef(gbdbPath, sizeof(gbdbPath), "%s/%s.cgb", pathPrefix, track); /* Create new line in meta table */ dyStringClear(dy); dyStringPrintf(dy, "insert into metaChromGraph values('%s',%f,%f,'%s');", track, minVal, maxVal, gbdbPath); sqlUpdate(conn, dy->string); } }
/* entry */ int main(int argc, char** argv) { char *selectFile, *inFile, *outFile, *dropFile; optionInit(&argc, argv, optionSpecs); if (argc != 4) usage("wrong # args"); selectFile = argv[1]; inFile = argv[2]; outFile = argv[3]; /* select file options */ if (optionExists("selectFmt") && optionExists("selectCoordCols")) errAbort("can't specify both -selectFmt and -selectCoordCols"); if (optionExists("selectFmt")) selectFmt = parseFormatSpec(optionVal("selectFmt", NULL)); else if (optionExists("selectCoordCols")) { selectCoordCols = coordColsParseSpec("selectCoordCols", optionVal("selectCoordCols", NULL)); selectFmt = COORD_COLS_FMT; } else selectFmt = getFileFormat(selectFile); if (optionExists("selectCds")) selectCaOpts |= chromAnnCds; if (optionExists("selectRange")) selectCaOpts |= chromAnnRange; if ((selectFmt == PSLQ_FMT) || (selectFmt == CHAINQ_FMT)) selectCaOpts |= chromAnnUseQSide; /* in file options */ if (optionExists("inFmt") && optionExists("inCoordCols")) errAbort("can't specify both -inFmt and -inCoordCols"); if (optionExists("inFmt")) inFmt = parseFormatSpec(optionVal("inFmt", NULL)); else if (optionExists("inCoordCols")) { inCoordCols = coordColsParseSpec("inCoordCols", optionVal("inCoordCols", NULL)); inFmt = COORD_COLS_FMT; } else inFmt = getFileFormat(inFile); inCaOpts = chromAnnSaveLines; // need lines for output if (optionExists("inCds")) inCaOpts |= chromAnnCds; if (optionExists("inRange")) inCaOpts |= chromAnnRange; if ((inFmt == PSLQ_FMT) || (inFmt == CHAINQ_FMT)) inCaOpts |= chromAnnUseQSide; /* select options */ useAggregate = optionExists("aggregate"); nonOverlapping = optionExists("nonOverlapping"); if (optionExists("strand") && optionExists("oppositeStrand")) errAbort("can only specify one of -strand and -oppositeStrand"); if (optionExists("strand")) selectOpts |= selStrand; if (optionExists("oppositeStrand")) selectOpts |= selOppositeStrand; if (optionExists("excludeSelf") && (optionExists("idMatch"))) errAbort("can't specify both -excludeSelf and -idMatch"); if (optionExists("excludeSelf")) selectOpts |= selExcludeSelf; if (optionExists("idMatch")) selectOpts |= selIdMatch; criteria.threshold = optionFloat("overlapThreshold", 0.0); criteria.thresholdCeil = optionFloat("overlapThresholdCeil", 1.1); criteria.similarity = optionFloat("overlapSimilarity", 0.0); criteria.similarityCeil = optionFloat("overlapSimilarityCeil", 1.1); criteria.bases = optionInt("overlapBases", -1); /* output options */ mergeOutput = optionExists("mergeOutput"); idOutput = optionExists("idOutput"); statsOutput = optionExists("statsOutput") || optionExists("statsOutputAll") || optionExists("statsOutputBoth"); if ((mergeOutput + idOutput + statsOutput) > 1) errAbort("can only specify one of -mergeOutput, -idOutput, -statsOutput, -statsOutputAll, or -statsOutputBoth"); outputAll = optionExists("statsOutputAll"); outputBoth = optionExists("statsOutputBoth"); if (outputBoth) outputAll = TRUE; if (mergeOutput) { if (nonOverlapping) errAbort("can't use -mergeOutput with -nonOverlapping"); if (useAggregate) errAbort("can't use -mergeOutput with -aggregate"); if ((selectFmt == CHAIN_FMT) || (selectFmt == CHAINQ_FMT) || (inFmt == CHAIN_FMT) || (inFmt == CHAINQ_FMT)) if (useAggregate) errAbort("can't use -mergeOutput with chains"); selectCaOpts |= chromAnnSaveLines; } dropFile = optionVal("dropped", NULL); /* check for options incompatible with aggregate mode */ if (useAggregate) { int i; for (i = 0; aggIncompatible[i] != NULL; i++) { if (optionExists(aggIncompatible[i])) errAbort("-%s is not allowed -aggregate", aggIncompatible[i]); } } overlapSelect(selectFile, inFile, outFile, dropFile); return 0; }
static void processMrnaFa(struct sqlConnection *conn, int taxon, char *type, char *db) /* process isPcr results */ { struct dyString *dy = dyStringNew(0); struct lineFile *lf = lineFileOpen("mrna.fa", TRUE); int lineSize; char *line; char *name; char *dna; boolean more = lineFileNext(lf, &line, &lineSize); while(more) { if (line[0] != '>') errAbort("unexpected error out of phase\n"); name = cloneString(line+1); verbose(2,"name=%s\n",name); dyStringClear(dy); while((more=lineFileNext(lf, &line, &lineSize))) { if (line[0] == '>') { break; } dyStringAppend(dy,line); } dna = cloneString(dy->string); while(1) { int oldProbe = 0; dyStringClear(dy); dyStringPrintf(dy, "select id from vgPrb " "where taxon=%d and type='%s' and tName='%s' and state='new'",taxon,type,name); oldProbe = sqlQuickNum(conn,dy->string); if (oldProbe==0) break; /* no more records match */ /* record exists and hasn't already been updated */ int vgPrb = findVgPrbBySeq(conn,dna,taxon); if (vgPrb == 0) { dyStringClear(dy); dyStringAppend(dy, "update vgPrb set"); dyStringAppend(dy, " seq = '"); dyStringAppend(dy, dna); dyStringAppend(dy, "',\n"); dyStringPrintf(dy, " db = '%s',\n", db); dyStringAppend(dy, " state = 'seq'\n"); dyStringPrintf(dy, " where id=%d\n", oldProbe); dyStringPrintf(dy, " and state='%s'\n", "new"); verbose(2, "%s\n", dy->string); sqlUpdate(conn, dy->string); } else /* probe seq already exists */ { /* just re-map the probe table recs to it */ dyStringClear(dy); dyStringPrintf(dy, "update vgPrbMap set vgPrb=%d where vgPrb=%d",vgPrb,oldProbe); sqlUpdate(conn, dy->string); /* and delete it from vgPrb */ dyStringClear(dy); dyStringPrintf(dy, "delete from vgPrb where id=%d",oldProbe); sqlUpdate(conn, dy->string); } } freez(&name); freez(&dna); } lineFileClose(&lf); dyStringFree(&dy); }
struct annoStreamer *annoStreamDbNew(char *db, char *table, struct annoAssembly *aa, struct asObject *asObj, int maxOutRows) /* Create an annoStreamer (subclass) object from a database table described by asObj. */ { struct sqlConnection *conn = hAllocConn(db); if (!sqlTableExists(conn, table)) errAbort("annoStreamDbNew: table '%s' doesn't exist in database '%s'", table, db); struct annoStreamDb *self = NULL; AllocVar(self); struct annoStreamer *streamer = &(self->streamer); int dbtLen = strlen(db) + strlen(table) + 2; char dbTable[dbtLen]; safef(dbTable, dbtLen, "%s.%s", db, table); annoStreamerInit(streamer, aa, asObj, dbTable); streamer->rowType = arWords; streamer->setRegion = asdSetRegion; streamer->nextRow = asdNextRow; streamer->close = asdClose; self->conn = conn; self->table = cloneString(table); char *asFirstColumnName = streamer->asObj->columnList->name; if (sqlFieldIndex(self->conn, self->table, "bin") == 0) { self->hasBin = 1; self->minFinestBin = binFromRange(0, 1); } if (self->hasBin && !sameString(asFirstColumnName, "bin")) self->omitBin = 1; if (!asdInitBed3Fields(self)) errAbort("annoStreamDbNew: can't figure out which fields of %s.%s to use as " "{chrom, chromStart, chromEnd}.", db, table); self->makeBaselineQuery = asdMakeBaselineQuery; // When a table has an index on endField, sometimes the query optimizer uses it // and that ruins the sorting. Fortunately most tables don't anymore. self->endFieldIndexName = sqlTableIndexOnField(self->conn, self->table, self->endField); self->notSorted = FALSE; // Special case: genbank-updated tables are not sorted because new mappings are // tacked on at the end. if (isIncrementallyUpdated(table)) self->notSorted = TRUE; self->mergeBins = FALSE; self->maxOutRows = maxOutRows; self->useMaxOutRows = (maxOutRows > 0); self->needQuery = TRUE; self->chromList = annoAssemblySeqNames(aa); if (slCount(self->chromList) > 1000) { // Assembly has many sequences (e.g. scaffold-based assembly) -- // don't break up into per-sequence queries. Take our chances // with mysql being unhappy about the sqlResult being open too long. self->doQuery = asdDoQuerySimple; self->nextRowRaw = nextRowFromSqlResult; } else { // All-chromosome assembly -- if table is large, perform a series of // chunked queries. self->doQuery = asdDoQueryChunking; self->nextRowRaw = nextRowFromBuffer; } return (struct annoStreamer *)self; }
struct gapCalc *gapCalcRead(struct lineFile *lf) /* Create gapCalc from open file. */ { int i, tableSize, startLong = -1; struct gapCalc *gapCalc; int *gapInitPos; double *gapInitQGap; double *gapInitTGap; double *gapInitBothGap; AllocVar(gapCalc); /* Parse file. */ readTaggedNumLine(lf, "tableSize", 1, &tableSize, NULL); readTaggedNumLine(lf, "smallSize", 1, &gapCalc->smallSize, NULL); AllocArray(gapInitPos,tableSize); AllocArray(gapInitQGap,tableSize); AllocArray(gapInitTGap,tableSize); AllocArray(gapInitBothGap,tableSize); readTaggedNumLine(lf, "position", tableSize, gapInitPos, NULL); readTaggedNumLine(lf, "qGap", tableSize, NULL, gapInitQGap); readTaggedNumLine(lf, "tGap", tableSize, NULL, gapInitTGap); readTaggedNumLine(lf, "bothGap", tableSize, NULL, gapInitBothGap); /* Set up precomputed interpolations for small gaps. */ AllocArray(gapCalc->qSmall, gapCalc->smallSize); AllocArray(gapCalc->tSmall, gapCalc->smallSize); AllocArray(gapCalc->bSmall, gapCalc->smallSize); for (i=1; i<gapCalc->smallSize; ++i) { gapCalc->qSmall[i] = interpolate(i, gapInitPos, gapInitQGap, tableSize); gapCalc->tSmall[i] = interpolate(i, gapInitPos, gapInitTGap, tableSize); gapCalc->bSmall[i] = interpolate(i, gapInitPos, gapInitBothGap, tableSize); } /* Set up to handle intermediate values. */ for (i=0; i<tableSize; ++i) { if (gapCalc->smallSize == gapInitPos[i]) { startLong = i; break; } } if (startLong < 0) errAbort("No position %d in gapCalcRead()\n", gapCalc->smallSize); gapCalc->longCount = tableSize - startLong; gapCalc->qPosCount = tableSize - startLong; gapCalc->tPosCount = tableSize - startLong; gapCalc->bPosCount = tableSize - startLong; gapCalc->longPos = cloneMem(gapInitPos + startLong, gapCalc->longCount * sizeof(int)); gapCalc->qLong = cloneMem(gapInitQGap + startLong, gapCalc->qPosCount * sizeof(double)); gapCalc->tLong = cloneMem(gapInitTGap + startLong, gapCalc->tPosCount * sizeof(double)); gapCalc->bLong = cloneMem(gapInitBothGap + startLong, gapCalc->bPosCount * sizeof(double)); /* Set up to handle huge values. */ gapCalc->qLastPos = gapCalc->longPos[gapCalc->qPosCount-1]; gapCalc->tLastPos = gapCalc->longPos[gapCalc->tPosCount-1]; gapCalc->bLastPos = gapCalc->longPos[gapCalc->bPosCount-1]; gapCalc->qLastPosVal = gapCalc->qLong[gapCalc->qPosCount-1]; gapCalc->tLastPosVal = gapCalc->tLong[gapCalc->tPosCount-1]; gapCalc->bLastPosVal = gapCalc->bLong[gapCalc->bPosCount-1]; gapCalc->qLastSlope = calcSlope(gapCalc->qLastPosVal, gapCalc->qLong[gapCalc->qPosCount-2], gapCalc->qLastPos, gapCalc->longPos[gapCalc->qPosCount-2]); gapCalc->tLastSlope = calcSlope(gapCalc->tLastPosVal, gapCalc->tLong[gapCalc->tPosCount-2], gapCalc->tLastPos, gapCalc->longPos[gapCalc->tPosCount-2]); gapCalc->bLastSlope = calcSlope(gapCalc->bLastPosVal, gapCalc->bLong[gapCalc->bPosCount-2], gapCalc->bLastPos, gapCalc->longPos[gapCalc->bPosCount-2]); freez(&gapInitPos); freez(&gapInitQGap); freez(&gapInitTGap); freez(&gapInitBothGap); return gapCalc; }
void loadOneBed(struct lineFile *lf, int bedSize, struct bedStub **pList) /* Load one bed file. Make sure all lines have the correct number of fields. * Put results in *pList. */ { char *words[64], *line, *dupe; int wordCount; struct bedStub *bed; struct asObject *asObj = getAsObj(bedSize); int fieldCount = getFieldCount(bedSize, asObj); struct bed *validateBed; AllocVar(validateBed); verbose(1, "Reading %s\n", lf->fileName); while (lineFileNextReal(lf, &line)) { if (hasBin) nextWord(&line); dupe = cloneString(line); if (strictTab) wordCount = chopTabs(line, words); else wordCount = chopLine(line, words); /* ignore empty lines */ if (0 == wordCount) continue; lineFileExpectWords(lf, fieldCount, wordCount); if (type) // TODO also, may need to add a flag to the validateBed() interface to support -allowNegativeScores when not isCt // although can probably get away without it since usually -allowNegativeScores is used by ct which has already verified it. // thus -allowNegativeScores is unlikely to be used with -type. { loadAndValidateBed(words, typeBedN, fieldCount, lf, validateBed, asObj, FALSE); checkChromNameAndSize(lf, validateBed->chrom, validateBed->chromEnd); } AllocVar(bed); bed->chrom = cloneString(words[0]); bed->chromStart = lineFileNeedNum(lf, words, 1); bed->chromEnd = lineFileNeedNum(lf, words, 2); if (! noStrict) { if ((bed->chromEnd < 1) && !allowStartEqualEnd) errAbort("ERROR: line %d:'%s'\nchromEnd is less than 1\n", lf->lineIx, dupe); if (bed->chromStart == bed->chromEnd && !allowStartEqualEnd) errAbort("ERROR: line %d:'%s'\nchromStart == chromEnd (%d) (zero-length item)\n" "Use -allowStartEqualEnd if that is legit (e.g. for insertion point).\n", lf->lineIx, dupe, bed->chromStart); if (bed->chromStart > bed->chromEnd) errAbort("ERROR: line %d:'%s'\nchromStart after chromEnd (%d > %d)\n", lf->lineIx, dupe, bed->chromStart, bed->chromEnd); } bed->line = dupe; slAddHead(pList, bed); } if (asObj) asObjectFreeList(&asObj); freez(&validateBed); }
struct bbiFile *bbiFileOpen(char *fileName, bits32 sig, char *typeName) /* Open up big wig or big bed file. */ { /* This code needs to agree with code in two other places currently - bigBedFileCreate, * and bigWigFileCreate. I'm thinking of refactoring to share at least between * bigBedFileCreate and bigWigFileCreate. It'd be great so it could be structured * so that it could send the input in one chromosome at a time, and send in the zoom * stuff only after all the chromosomes are done. This'd potentially reduce the memory * footprint by a factor of 2 or 4. Still, for now it works. -JK */ struct bbiFile *bbi; AllocVar(bbi); bbi->fileName = cloneString(fileName); struct udcFile *udc = bbi->udc = udcFileOpen(fileName, udcDefaultDir()); /* Read magic number at head of file and use it to see if we are proper file type, and * see if we are byte-swapped. */ bits32 magic; boolean isSwapped = FALSE; udcMustRead(udc, &magic, sizeof(magic)); if (magic != sig) { magic = byteSwap32(magic); isSwapped = TRUE; if (magic != sig) errAbort("%s is not a %s file", fileName, typeName); } bbi->typeSig = sig; bbi->isSwapped = isSwapped; /* Read rest of defined bits of header, byte swapping as needed. */ bbi->version = udcReadBits16(udc, isSwapped); bbi->zoomLevels = udcReadBits16(udc, isSwapped); bbi->chromTreeOffset = udcReadBits64(udc, isSwapped); bbi->unzoomedDataOffset = udcReadBits64(udc, isSwapped); bbi->unzoomedIndexOffset = udcReadBits64(udc, isSwapped); bbi->fieldCount = udcReadBits16(udc, isSwapped); bbi->definedFieldCount = udcReadBits16(udc, isSwapped); bbi->asOffset = udcReadBits64(udc, isSwapped); bbi->totalSummaryOffset = udcReadBits64(udc, isSwapped); bbi->uncompressBufSize = udcReadBits32(udc, isSwapped); /* Skip over reserved area. */ udcSeek(udc, 64); /* Read zoom headers. */ int i; struct bbiZoomLevel *level, *levelList = NULL; for (i=0; i<bbi->zoomLevels; ++i) { AllocVar(level); level->reductionLevel = udcReadBits32(udc, isSwapped); level->reserved = udcReadBits32(udc, isSwapped); level->dataOffset = udcReadBits64(udc, isSwapped); level->indexOffset = udcReadBits64(udc, isSwapped); slAddHead(&levelList, level); } slReverse(&levelList); bbi->levelList = levelList; /* Attach B+ tree of chromosome names and ids. */ udcSeek(udc, bbi->chromTreeOffset); bbi->chromBpt = bptFileAttach(fileName, udc); return bbi; }
boolean asCompareObjs(char *name1, struct asObject *as1, char *name2, struct asObject *as2, int numColumnsToCheck, int *retNumColumnsSame, boolean abortOnDifference) /* Compare as-objects as1 and as2 making sure several important fields show they are the same name and type. * If difference found, print it to stderr. If abortOnDifference, errAbort. * Othewise, return TRUE if the objects columns match through the first numColumnsToCheck fields. * If retNumColumnsSame is not NULL, then it will be set to the number of contiguous matching columns. */ { boolean differencesFound = FALSE; struct asColumn *col1 = as1->columnList, *col2 = as2->columnList; int checkCount = 0; int verboseLevel = 2; if (abortOnDifference) verboseLevel = 1; if (as1->isTable != as2->isTable) { verbose(verboseLevel,"isTable does not match: %s=[%d] %s=[%d]", name1, as1->isTable, name2, as2->isTable); differencesFound = TRUE; } else if (as1->isSimple != as2->isSimple) { verbose(verboseLevel,"isSimple does not match: %s=[%d] %s=[%d]", name1, as1->isSimple, name2, as2->isSimple); differencesFound = TRUE; } else { if (!as1->isTable) { errAbort("asCompareObjLists only supports Table .as objects at this time."); } for (col1 = as1->columnList, col2 = as2->columnList; col1 != NULL && col2 != NULL && checkCount < numColumnsToCheck; col1 = col1->next, col2 = col2->next, ++checkCount) { if (!sameOk(col1->name, col2->name)) { verbose(verboseLevel,"column #%d names do not match: %s=[%s] %s=[%s]\n" , checkCount+1, name1, col1->name, name2, col2->name); differencesFound = TRUE; break; } else if (col1->isSizeLink != col2->isSizeLink) { verbose(verboseLevel,"column #%d isSizeLink do not match: %s=[%d] %s=[%d]\n" , checkCount+1, name1, col1->isSizeLink, name2, col2->isSizeLink); differencesFound = TRUE; break; } else if (col1->isList != col2->isList) { verbose(verboseLevel,"column #%d isList do not match: %s=[%d] %s=[%d]\n" , checkCount+1, name1, col1->isList, name2, col2->isList); differencesFound = TRUE; break; } else if (col1->isArray != col2->isArray) { verbose(verboseLevel,"column #%d isArray do not match: %s=[%d] %s=[%d]\n" , checkCount+1, name1, col1->isArray, name2, col2->isArray); differencesFound = TRUE; break; } else if (!sameOk(col1->lowType->name, col2->lowType->name)) { verbose(verboseLevel,"column #%d type names do not match: %s=[%s] %s=[%s]\n" , checkCount+1, name1, col1->lowType->name, name2, col2->lowType->name); differencesFound = TRUE; break; } else if (col1->fixedSize != col2->fixedSize) { verbose(verboseLevel,"column #%d fixedSize do not match: %s=[%d] %s=[%d]\n" , checkCount+1, name1, col1->fixedSize, name2, col2->fixedSize); differencesFound = TRUE; break; } else if (!sameOk(col1->linkedSizeName, col2->linkedSizeName)) { verbose(verboseLevel,"column #%d linkedSizeName do not match: %s=[%s] %s=[%s]\n" , checkCount+1, name1, col1->linkedSizeName, name2, col2->linkedSizeName); differencesFound = TRUE; break; } } if (!differencesFound && checkCount < numColumnsToCheck) errAbort("Unexpected error in asCompareObjLists: asked to compare %d columns in %s and %s, but only found %d in one or both asObjects." , numColumnsToCheck, name1, name2, checkCount); } if (differencesFound) { if (abortOnDifference) errAbort("asObjects differ."); else verbose(verboseLevel,"asObjects differ. Matching field count=%d\n", checkCount); } if (retNumColumnsSame) *retNumColumnsSame = checkCount; return (!differencesFound); }
void makeConfigFromEncodeList(char *input, char *output) /* create config file for hgBedsToBedExps from tab-separated file of format * <relDate> <fileName> <fileSize> <submitDate> <metadata> */ { FILE *f = mustOpen(output, "w"); struct lineFile *lf = lineFileOpen(input, TRUE); char *line; while (lineFileNextReal(lf, &line)) { /* Parse out line into major components. */ char *releaseDate = nextWord(&line); char *fileName = nextWord(&line); char *fileSize = nextWord(&line); char *submitDate = nextWord(&line); char *metadata = trimSpaces(line); if (isEmpty(metadata)) errAbort("line %d of %s is truncated", lf->lineIx, lf->fileName); verbose(2, "releaseDate=%s; fileName=%s; fileSize=%s; submitDate=%s; %s\n", releaseDate, fileName, fileSize, submitDate, metadata); /* Loop through metadata looking for cell and antibody. Metadata * is in format this=that; that=two words; that=whatever */ char *cell = NULL, *antibody = NULL; for (;;) { /* Find terminating semicolon if any replace it with zero, and * note position for next time around loop. */ metadata = skipLeadingSpaces(metadata); if (isEmpty(metadata)) break; char *semi = strchr(metadata, ';'); if (semi != NULL) *semi++ = 0; /* Parse out name/value pair. */ char *name = metadata; char *value = strchr(metadata, '='); if (value == NULL) errAbort("Missing '=' in metadata after tag %s in line %d of %s", name, lf->lineIx, lf->fileName); *value++ = 0; name = trimSpaces(name); value = trimSpaces(value); /* Look for our tags. */ if (sameString(name, "cell")) cell = value; else if (sameString(name, "antibody")) antibody = value; metadata = semi; } if (cell == NULL) errAbort("No cell in metadata line %d of %s", lf->lineIx, lf->fileName); if (antibody == NULL) errAbort("No antibody in metadata line %d of %s", lf->lineIx, lf->fileName); fprintf(f, "%s\t%s\t", antibody, cell); fprintf(f, "%s\t", cellAbbreviation(cell)); fprintf(f, "file\t%d\t", scoreCol-1); fprintf(f, "%g", calcNormScoreFactor(fileName, scoreCol-1)); fprintf(f, "\t%s\n", fileName); } carefulClose(&f); }
void fakeFinContigs(char *agpName, char *faName, char *finDir, char *rootName, char *finFaDir, char *ooVer) /* fakeFinContigs - Fake up contigs for a finished chromosome. */ { struct contig *contigList = NULL, *contig = NULL; struct agpFrag *agp; struct lineFile *lf = lineFileOpen(agpName, TRUE); char *line, *words[16]; int lineSize, wordCount; int contigIx = 0; char liftDir[512], contigDir[512], path[512]; char chrom[128]; FILE *f; struct dnaSeq *seq; int fragIx; /* Build up contig list by scanning agp file. */ printf("Reading %s\n", lf->fileName); while (lineFileNext(lf, &line, &lineSize)) { if (line[0] == '#' || line[0] == 0) continue; wordCount = chopLine(line, words); if (wordCount < 5) errAbort("Expecting at least 5 words line %d of %s", lf->lineIx, lf->fileName); if (words[4][0] == 'N' || words[4][0] == 'U') { contig = NULL; continue; } lineFileExpectWords(lf, 9, wordCount); agp = agpFragLoad(words); // file is 1-based but agpFragLoad() now assumes 0-based: agp->chromStart -= 1; agp->fragStart -= 1; if (contig == NULL) { AllocVar(contig); sprintf(contig->name, "%s%d", rootName, ++contigIx); contig->startOffset = agp->chromStart; slAddHead(&contigList, contig); } else { if (contig->agpList != NULL && contig->agpList->chromEnd != agp->chromStart) errAbort("Start doesn't match previous end line %d of %s", lf->lineIx, lf->fileName); } if (agp->chromEnd - agp->chromStart != agp->fragEnd - agp->fragStart) errAbort("Chrom and frag size mismatch line %d of %s", lf->lineIx, lf->fileName); slAddHead(&contig->agpList, agp); contig->endOffset = agp->chromEnd; } slReverse(&contigList); for (contig = contigList; contig != NULL; contig = contig->next) slReverse(&contig->agpList); lineFileClose(&lf); /* Load up chromosome sequence and make sure it is in one piece. */ printf("Reading %s\n", faName); seq = faReadAllDna(faName); if (slCount(seq) != 1) errAbort("Got %d sequences in %s, can only handle one.", slCount(seq), faName); /* Fix up agp coordinates. Make a directory for each contig. Fill it with * .fa .agp barge.NN files for that contig. */ printf("Writing contig dirs\n"); for (contig = contigList; contig != NULL; contig = contig->next) { /* Make Contig dir. */ sprintf(contigDir, "%s/%s", finDir, contig->name); makeDir(contigDir); /* Make contig.agp file. */ sprintf(path, "%s/%s.agp", contigDir, contig->name); f = mustOpen(path, "w"); fragIx = 0; for (agp = contig->agpList; agp != NULL; agp = agp->next) { char buf[128]; sprintf(buf, "%s/%s", skipChr(agp->chrom), contig->name); freez(&agp->chrom); agp->chrom = cloneString(buf); agp->chromStart -= contig->startOffset; agp->chromEnd -= contig->startOffset; agp->ix = ++fragIx; agpFragTabOut(agp, f); } carefulClose(&f); /* Make ooGreedy.NN.gl file */ sprintf(path, "%s/%s.%s.gl", contigDir, "ooGreedy", ooVer); f = mustOpen(path, "w"); for (agp = contig->agpList; agp != NULL; agp = agp->next) { if (agp->type[0] != 'N' && agp->type[0] != 'U') { fprintf(f, "%s_1\t%d\t%d\t%s\n", agp->frag, agp->chromStart, agp->chromEnd, agp->strand); } } carefulClose(&f); /* Make contig.fa file. */ sprintf(path, "%s/%s.fa", contigDir, contig->name); faWrite(path, contig->name, seq->dna + contig->startOffset, contig->endOffset - contig->startOffset); /* Make contig/barge file. */ sprintf(path, "%s/barge.%s", contigDir, ooVer); f = mustOpen(path, "w"); fprintf(f, "Barge (Connected Clone) File ooGreedy Version %s\n", ooVer); fprintf(f, "\n"); fprintf(f, "start accession size overlap maxClone maxOverlap\n"); fprintf(f, "------------------------------------------------------------\n"); for (agp = contig->agpList; agp != NULL; agp = agp->next) { char clone[128]; strcpy(clone, agp->frag); chopSuffix(clone); fprintf(f, "%d\t%s\t%d\t100\tn/a\t0\n", agp->chromStart, clone, agp->chromEnd); } carefulClose(&f); /* Make contig/gold file. */ sprintf(path, "%s/gold.%s", contigDir, ooVer); f = mustOpen(path, "w"); fragIx = 0; for (agp = contig->agpList; agp != NULL; agp = agp->next) { char fragName[128]; struct agpFrag frag = *agp; sprintf(fragName, "%s_1", agp->frag); frag.frag = fragName; frag.type[0] = '0'; agpFragTabOut(&frag, f); } carefulClose(&f); } /* Create lift subdirectory. */ printf("Creating lift files\n"); sprintf(liftDir, "%s/lift", finDir); makeDir(liftDir); /* Create lift/oOut.lst file (just a list of contigs). */ sprintf(path, "%s/oOut.lst", liftDir); f = mustOpen(path, "w"); for (contig = contigList; contig != NULL; contig = contig->next) fprintf(f, "%s/%s.fa.out\n", contig->name, contig->name); carefulClose(&f); /* Create lift/ordered.lst file (just a list of contigs). */ sprintf(path, "%s/ordered.lst", liftDir); f = mustOpen(path, "w"); for (contig = contigList; contig != NULL; contig = contig->next) fprintf(f, "%s\n", contig->name); carefulClose(&f); /* Create lift/ordered.lft file. */ sprintf(path, "%s/ordered.lft", liftDir); f = mustOpen(path, "w"); splitPath(faName, NULL, chrom, NULL); for (contig = contigList; contig != NULL; contig = contig->next) fprintf(f, "%d\t%s/%s\t%d\t%s\t%d\n", contig->startOffset, skipChr(chrom), contig->name, contig->endOffset - contig->startOffset, chrom, seq->size); carefulClose(&f); }
void printMimeInfo(struct mimePart *mp, FILE *out, int level) /* print mimeParts recursively if needed */ { char *cd = NULL, *cdMain = NULL, *cdName = NULL, *cdFileName = NULL, *ct = NULL, *ce = NULL; char *margin = needMem(level+1); int i = 0; for(i=0;i<level;++i) margin[i] = ' '; margin[level] = 0; cd = hashFindVal(mp->hdr,"content-disposition"); ct = hashFindVal(mp->hdr,"content-type"); ce = hashFindVal(mp->hdr,"content-transfer-encoding"); if (cd) { fprintf(out,"%scontent-disposition: %s\n",margin,cd); cdMain=getMimeHeaderMainVal(cd); cdName=getMimeHeaderFieldVal(cd,"name"); fprintf(out,"%smain:[%s]\n",margin,cdMain); fprintf(out,"%sname:[%s]\n",margin,cdName); cdFileName=getMimeHeaderFieldVal(cd,"filename"); if (cdFileName) fprintf(out,"%sfilename:[%s]\n",margin,cdFileName); } if (ct) fprintf(out,"%scontent-type: %s\n",margin,ct); if (ce) fprintf(out,"%scontent-transer-encoding: %s\n",margin,ce); if (cd) { fprintf(out,"%ssize:[%llu]\n",margin,(unsigned long long) mp->size); if (mp->binary) fprintf(out,"%sbinary (contains zeros)\n",margin); if (mp->fileName) fprintf(out,"%sfileName=[%s]\n",margin, mp->fileName); fprintf(out,"%sdata:[%s]\n",margin, mp->binary && mp->data ? "<binary data not safe to print>" : mp->data); fprintf(out,"\n"); } if (mp->data) { } else if (mp->fileName) { } else if (mp->multi) { fprintf(out,"%snested MIME structure\n\n",margin); for(mp=mp->multi;mp;mp=mp->next) printMimeInfo(mp, out, level+1); } else { errAbort("mp-> type not data,fileName, or multi - unexpected MIME structure"); } freez(&cdMain); freez(&cdName); freez(&cdFileName); freez(&margin); }
int main(int argc, char *argv[]) /* Process command line. */ { struct sqlConnection *conn = NULL; char *command = NULL; optionInit(&argc, argv, options); database = optionVal("database", database); sqlPath = optionVal("sqlPath", sqlPath); if (argc < 2) usage(); command = argv[1]; if (argc >= 3) setCurrentDir(argv[2]); conn = sqlConnect(database); if (sameWord(command,"INIT")) { if (argc != 2) usage(); errAbort("INIT is probably too dangerous. DO NOT USE."); /* init(conn); */ } else if (sameWord(command,"POP")) { if (argc != 2) usage(); /* populate vgPrb where missing */ populateMissingVgPrb(conn); } else if (sameWord(command,"SEQ")) { if (argc != 4) usage(); /* make fake probe sequences */ makeFakeProbeSeq(conn,argv[3]); } else if (sameWord(command,"ALI")) { if (argc != 4) usage(); /* blat anything left that is not aligned, nor even attempted */ doAlignments(conn,argv[3]); } else if (sameWord(command,"EXT")) { if (argc != 4) usage(); /* update seq and extfile as necessary */ doSeqAndExtFile(conn,argv[3],"vgProbes"); } else if (sameWord(command,"PSLMAP")) { if (argc != 5) usage(); /* pslMap anything left that is not aligned, nor even attempted */ doAlignmentsPslMap(conn,argv[3],argv[4]); } else if (sameWord(command,"REMAP")) { if (argc != 7) usage(); /* re-map anything in track specified that is not aligned, nor even attempted yet, using specified fasta file. */ doAlignmentsReMap(conn,argv[3],argv[4],argv[5],argv[6]); } else if (sameWord(command,"SELFMAP")) { if (argc != 4) usage(); /* re-map anything in track specified that is not aligned, nor even attempted yet, using specified fasta file. */ doAlignmentsSelfMap(conn,argv[3]); } else if (sameWord(command,"EXTALL")) { if (argc != 4) usage(); /* update seq and extfile as necessary */ doSeqAndExtFile(conn,argv[3],"vgAllProbes"); } else usage(); sqlDisconnect(&conn); return 0; }
void writeBedTab(char *fileName, struct bedStub *bedList) /* Write out bed list to tab-separated file. */ { struct bedStub *bed; FILE *f = mustOpen(fileName, "w"); char *words[64]; int i, wordCount; for (bed = bedList; bed != NULL; bed = bed->next) { if (!noBin) { // allow for zero-length at start of seq [bin code can't handle 0-0] unsigned end = (bed->chromEnd > 0) ? bed->chromEnd : 1; if (fprintf(f, "%u\t", hFindBin(bed->chromStart, end)) <= 0) writeFailed(fileName); } if (strictTab) wordCount = chopTabs(bed->line, words); else wordCount = chopLine(bed->line, words); for (i=0; i<wordCount; ++i) { /* new definition for old "reserved" field, now itemRgb */ /* and when itemRgb, it is a comma separated string r,g,b */ if (itemRgb && (i == 8)) { char *comma; /* Allow comma separated list of rgb values here */ comma = strchr(words[8], ','); if (comma) { int itemRgb = 0; if (-1 == (itemRgb = bedParseRgb(words[8]))) errAbort("ERROR: expecting r,g,b specification, " "found: '%s'", words[8]); else if (fprintf(f, "%d", itemRgb) <= 0) writeFailed(fileName); verbose(2, "itemRgb: %s, rgb: %#x\n", words[8], itemRgb); } else if (fputs(words[i], f) == EOF) writeFailed(fileName); } else if ((dotIsNull > 0) && (dotIsNull == i) && sameString(words[i],".")) /* If the . was used to represent NULL, replace with -1 in the tables */ { if (fputs("-1", f) == EOF) writeFailed(fileName); } else if (fputs(words[i], f) == EOF) writeFailed(fileName); if (i == wordCount-1) { if (fputc('\n', f) == EOF) writeFailed(fileName); } else if (fputc('\t', f) == EOF) writeFailed(fileName); } } fclose(f); }
static void processIsPcr(struct sqlConnection *conn, int taxon, char *db) /* process isPcr results */ { /* >NM_010919:371+1088 2 718bp CGCGGATCCAAGGACATCTTGGACCTTCCG CCCAAGCTTGCATGTGCTGCAGCGACTGCG */ struct dyString *dy = dyStringNew(0); struct lineFile *lf = lineFileOpen("isPcr.fa", TRUE); int lineSize; char *line; char *name; char *dna; char *word, *end; char *tName; int tStart; int tEnd; char *tStrand; int probeid=0; /* really a vgPrb id */ boolean more = lineFileNext(lf, &line, &lineSize); while(more) { if (line[0] != '>') errAbort("unexpected error out of phase\n"); name = cloneString(line); verbose(1,"name=%s\n",name); dyStringClear(dy); while((more=lineFileNext(lf, &line, &lineSize))) { if (line[0] == '>') { break; } dyStringAppend(dy,line); } dna = cloneString(dy->string); word = name+1; end = strchr(word,':'); tName = cloneStringZ(word,end-word); word = end+1; end = strchr(word,'+'); tStrand = "+"; if (!end) { end = strchr(word,'-'); tStrand = "-"; } tStart = atoi(word); word = end+1; end = strchr(word,' '); tEnd = atoi(word); word = end+1; end = strchr(word,' '); probeid = atoi(word); dyStringClear(dy); dyStringPrintf(dy, "select count(*) from vgPrb where id=%d and state='new'",probeid); if (sqlQuickNum(conn,dy->string)>0) { /* record exists and hasn't already been updated */ int vgPrb = findVgPrbBySeq(conn,dna,taxon); if (vgPrb == 0) { dyStringClear(dy); dyStringAppend(dy, "update vgPrb set"); dyStringAppend(dy, " seq='"); dyStringAppend(dy, dna); dyStringAppend(dy, "',\n"); dyStringPrintf(dy, " tName='%s',\n", tName); dyStringPrintf(dy, " tStart=%d,\n", tStart); dyStringPrintf(dy, " tEnd=%d,\n", tEnd); dyStringPrintf(dy, " tStrand='%s',\n", tStrand); dyStringPrintf(dy, " db='%s',\n", db); dyStringPrintf(dy, " state='%s'\n", "seq"); dyStringPrintf(dy, " where id=%d\n", probeid); dyStringPrintf(dy, " and state='%s'\n", "new"); verbose(2, "%s\n", dy->string); sqlUpdate(conn, dy->string); } else /* probe seq already exists */ { /* just re-map the probe table recs to it */ dyStringClear(dy); dyStringPrintf(dy, "update vgPrbMap set vgPrb=%d where vgPrb=%d",vgPrb,probeid); sqlUpdate(conn, dy->string); /* and delete it from vgPrb */ dyStringClear(dy); dyStringPrintf(dy, "delete from vgPrb where id=%d",probeid); sqlUpdate(conn, dy->string); } } freez(&tName); freez(&name); freez(&dna); } lineFileClose(&lf); dyStringFree(&dy); }
static void loadDatabase(char *database, char *track, int bedSize, struct bedStub *bedList) /* Load database from bedList. */ { struct sqlConnection *conn; struct dyString *dy = newDyString(1024); char *tab = (char *)NULL; int loadOptions = (optionExists("onServer") ? SQL_TAB_FILE_ON_SERVER : 0); if ( ! noLoad ) conn = sqlConnect(database); if ((char *)NULL != tmpDir) tab = cloneString(rTempName(tmpDir,"loadBed",".tab")); else tab = cloneString("bed.tab"); if (bedDetail && sqlTable == NULL && !customTrackLoader) errAbort("bedDetail format requires sqlTable option"); if (bedDetail && !strictTab) errAbort("bedDetail format must be tab separated"); if (bedDetail && !noBin) noBin = TRUE; /* First make table definition. */ if (sqlTable != NULL && !oldTable) { /* Read from file. */ char *sql, *s; readInGulp(sqlTable, &sql, NULL); /* Chop off end-of-statement semicolon if need be. */ s = strchr(sql, ';'); if (s != NULL) *s = 0; if ( !noLoad ) { if (renameSqlTable) { char *pos = stringIn("CREATE TABLE ", sql); if (pos == NULL) errAbort("Can't find CREATE TABLE in %s\n", sqlTable); char *oldSql = cloneString(sql); nextWord(&pos); nextWord(&pos); char *tableName = nextWord(&pos); sql = replaceChars(oldSql, tableName, track); } verbose(1, "Creating table definition for %s from sql: %s\n", track, sqlTable); // add NOSQLINJ tag sqlDyStringPrintf(dy, "%-s", sql); sqlRemakeTable(conn, track, dy->string); if (!noBin) addBinToEmptyTable(conn, track); adjustSqlTableColumns(conn, track, bedSize); } freez(&sql); } else if (!oldTable) { int minLength; if (noLoad) minLength=6; else if (maxChromNameLength) minLength = maxChromNameLength; else minLength = hGetMinIndexLength(database); verbose(2, "INDEX chrom length: %d\n", minLength); /* Create definition statement. */ verbose(1, "Creating table definition for %s, bedSize: %d\n", track, bedSize); sqlDyStringPrintf(dy, "CREATE TABLE %s (\n", track); if (!noBin) dyStringAppend(dy, " bin smallint unsigned not null,\n"); dyStringAppend(dy, " chrom varchar(255) not null,\n"); dyStringAppend(dy, " chromStart int unsigned not null,\n"); dyStringAppend(dy, " chromEnd int unsigned not null,\n"); if (bedSize >= 4) maybeBedGraph(4, dy, " name varchar(255) not null,\n"); if (bedSize >= 5) { if (allowNegativeScores) maybeBedGraph(5, dy, " score int not null,\n"); else maybeBedGraph(5, dy, " score int unsigned not null,\n"); } if (bedSize >= 6) maybeBedGraph(6, dy, " strand char(1) not null,\n"); if (bedSize >= 7) maybeBedGraph(7, dy, " thickStart int unsigned not null,\n"); if (bedSize >= 8) maybeBedGraph(8, dy, " thickEnd int unsigned not null,\n"); /* As of 2004-11-22 the reserved field is used as itemRgb in code */ if (bedSize >= 9) maybeBedGraph(9, dy, " reserved int unsigned not null,\n"); if (bedSize >= 10) maybeBedGraph(10, dy, " blockCount int unsigned not null,\n"); if (bedSize >= 11) maybeBedGraph(11, dy, " blockSizes longblob not null,\n"); if (bedSize >= 12) maybeBedGraph(12, dy, " chromStarts longblob not null,\n"); if (bedSize >= 13) maybeBedGraph(13, dy, " expCount int unsigned not null,\n"); if (bedSize >= 14) maybeBedGraph(14, dy, " expIds longblob not null,\n"); if (bedSize >= 15) maybeBedGraph(15, dy, " expScores longblob not null,\n"); dyStringAppend(dy, "#Indices\n"); if (nameIx && (bedSize >= 4) && (0 == bedGraph)) dyStringAppend(dy, " INDEX(name(16)),\n"); if (noBin) { dyStringPrintf(dy, " INDEX(chrom(%d),chromStart)\n", minLength); } else { dyStringPrintf(dy, " INDEX(chrom(%d),bin)\n", minLength); } dyStringAppend(dy, ")\n"); if (noLoad) verbose(2,"%s", dy->string); else sqlRemakeTable(conn, track, dy->string); } verbose(1, "Saving %s\n", tab); writeBedTab(tab, bedList); if ( ! noLoad ) { verbose(1, "Loading %s\n", database); if (customTrackLoader) sqlLoadTabFile(conn, tab, track, loadOptions|SQL_TAB_FILE_WARN_ON_WARN); else sqlLoadTabFile(conn, tab, track, loadOptions); if (! noHistory) hgHistoryComment(conn, "Add %d element(s) from bed list to %s table", slCount(bedList), track); if(fillInScoreColumn != NULL) { char query[500]; char buf[500]; struct sqlResult *sr; sqlSafef(query, sizeof(query), "select sum(score) from %s", track); if(sqlQuickQuery(conn, query, buf, sizeof(buf))) { unsigned sum = sqlUnsigned(buf); if (!sum) { sqlSafef(query, sizeof(query), "select min(%s), max(%s) from %s", fillInScoreColumn, fillInScoreColumn, track); if ((sr = sqlGetResult(conn, query)) != NULL) { char **row = sqlNextRow(sr); if(row != NULL) { float min = sqlFloat(row[0]); float max = sqlFloat(row[1]); if ( !(max == -1 && min == -1)) // if score is -1 then ignore, as if it werent present { if (max == min || sameString(row[0],row[1])) // this will lead to 'inf' score value in SQL update causing an error errAbort("Could not set score in table %s max(%s)=min(%s)=%s\n", track, fillInScoreColumn, fillInScoreColumn, row[0]); sqlFreeResult(&sr); // Calculate a, b s/t f(x) = ax + b maps min-max => minScore-1000 float a = (1000-minScore) / (max - min); float b = 1000 - ((1000-minScore) * max) / (max - min); sqlSafef(query, sizeof(query), "update %s set score = round((%f * %s) + %f)", track, a, fillInScoreColumn, b); int changed = sqlUpdateRows(conn, query, NULL); verbose(2, "update query: %s; changed: %d\n", query, changed); } else { sqlFreeResult(&sr); verbose(2, "score not updated; all values for column %s are -1\n", fillInScoreColumn); } } } } } } sqlDisconnect(&conn); /* if temp dir specified, unlink file to make it disappear */ if ((char *)NULL != tmpDir) unlink(tab); } else verbose(1, "No load option selected, see file: %s\n", tab); } /* static void loadDatabase() */
static void asdDoQueryChunking(struct annoStreamDb *self, char *minChrom, uint minEnd) /* Return a sqlResult for a query on table items in position range. * If doing a whole genome query, just select all rows from table. */ { struct annoStreamer *sSelf = &(self->streamer); boolean hasWhere = FALSE; struct dyString *query = self->makeBaselineQuery(self, &hasWhere); if (sSelf->chrom != NULL && self->rowBuf.size > 0 && !self->doNextChunk) { // We're doing a region query, we already got some rows, and don't need another chunk: resetRowBuf(&self->rowBuf); self->eof = TRUE; } if (self->useMaxOutRows) { self->maxOutRows -= self->rowBuf.size; if (self->maxOutRows <= 0) self->eof = TRUE; } if (self->eof) return; int queryMaxItems = ASD_CHUNK_SIZE; if (self->useMaxOutRows && self->maxOutRows < queryMaxItems) queryMaxItems = self->maxOutRows; if (self->hasBin) { // Results will be in bin order, but we can restore chromStart order by // accumulating initial coarse-bin items and merge-sorting them with // subsequent finest-bin items which will be in chromStart order. if (self->doNextChunk && self->mergeBins && !self->gotFinestBin) errAbort("annoStreamDb %s: can't continue merge in chunking query; " "increase ASD_CHUNK_SIZE", sSelf->name); self->mergeBins = TRUE; if (self->qLm == NULL) self->qLm = lmInit(0); } if (self->endFieldIndexName != NULL) // Don't let mysql use a (chrom, chromEnd) index because that messes up // sorting by chromStart. sqlDyStringPrintf(query, " IGNORE INDEX (%s) ", self->endFieldIndexName); if (sSelf->chrom != NULL) { uint start = sSelf->regionStart; if (minChrom) { if (differentString(minChrom, sSelf->chrom)) errAbort("annoStreamDb %s: nextRow minChrom='%s' but region chrom='%s'", sSelf->name, minChrom, sSelf->chrom); if (start < minEnd) start = minEnd; } if (self->doNextChunk && start < self->nextChunkStart) start = self->nextChunkStart; sqlDyStringAppend(query, hasWhere ? " and " : " where "); sqlDyStringPrintf(query, "%s = '%s' and ", self->chromField, sSelf->chrom); if (self->hasBin) { if (self->doNextChunk && self->gotFinestBin) // It would be way more elegant to make a hAddBinTopLevelOnly but this will do: dyStringPrintf(query, "bin > %d and ", self->minFinestBin); hAddBinToQuery(start, sSelf->regionEnd, query); } if (self->doNextChunk) sqlDyStringPrintf(query, "%s >= %u and ", self->startField, self->nextChunkStart); sqlDyStringPrintf(query, "%s < %u and %s > %u ", self->startField, sSelf->regionEnd, self->endField, start); if (self->notSorted) sqlDyStringPrintf(query, "order by %s ", self->startField); sqlDyStringPrintf(query, "limit %d", queryMaxItems); bufferRowsFromSqlQuery(self, query->string, queryMaxItems); if (self->rowBuf.size == 0) self->eof = TRUE; } else { // Genome-wide query: break it into chrom-by-chrom queries. if (self->queryChrom == NULL) self->queryChrom = self->chromList; else if (!self->doNextChunk) { self->queryChrom = self->queryChrom->next; resetMergeState(self); } if (minChrom != NULL) { // Skip chroms that precede minChrom while (self->queryChrom != NULL && strcmp(self->queryChrom->name, minChrom) < 0) { self->queryChrom = self->queryChrom->next; self->doNextChunk = FALSE; resetMergeState(self); } if (self->hasBin) { self->mergeBins = TRUE; if (self->qLm == NULL) self->qLm = lmInit(0); } } if (self->queryChrom == NULL) self->eof = TRUE; else { char *chrom = self->queryChrom->name; int start = 0; if (minChrom != NULL && sameString(chrom, minChrom)) start = minEnd; if (self->doNextChunk && start < self->nextChunkStart) start = self->nextChunkStart; uint end = annoAssemblySeqSize(self->streamer.assembly, self->queryChrom->name); sqlDyStringAppend(query, hasWhere ? " and " : " where "); sqlDyStringPrintf(query, "%s = '%s' ", self->chromField, chrom); if (start > 0 || self->doNextChunk) { dyStringAppend(query, "and "); if (self->hasBin) { if (self->doNextChunk && self->gotFinestBin) // It would be way more elegant to make a hAddBinTopLevelOnly but this will do: dyStringPrintf(query, "bin > %d and ", self->minFinestBin); hAddBinToQuery(start, end, query); } if (self->doNextChunk) sqlDyStringPrintf(query, "%s >= %u and ", self->startField, self->nextChunkStart); // region end is chromSize, so no need to constrain startField here: sqlDyStringPrintf(query, "%s > %u ", self->endField, start); } if (self->notSorted) sqlDyStringPrintf(query, "order by %s ", self->startField); dyStringPrintf(query, "limit %d", queryMaxItems); bufferRowsFromSqlQuery(self, query->string, queryMaxItems); // If there happens to be no items on chrom, try again with the next chrom: if (! self->eof && self->rowBuf.size == 0) asdDoQueryChunking(self, minChrom, minEnd); } } dyStringFree(&query); }
int main(int argc, char *argv[]) /* Process command line. */ { char *chromInfo; optionInit(&argc, argv, optionSpecs); if (argc < 4) usage(); noBin = optionExists("noBin") || optionExists("nobin"); noSort = optionExists("noSort"); strictTab = optionExists("tab"); oldTable = optionExists("oldTable"); sqlTable = optionVal("sqlTable", sqlTable); renameSqlTable = optionExists("renameSqlTable"); trimSqlTable = optionExists("trimSqlTable"); as = optionVal("as", as); type = optionVal("type", type); hasBin = optionExists("hasBin"); noLoad = optionExists("noLoad"); noHistory = optionExists("noHistory"); bedGraph = optionInt("bedGraph",0); bedDetail = optionExists("bedDetail"); minScore = optionInt("minScore",100); if (minScore<0 || minScore>1000) errAbort("minScore must be between 0-1000\n"); notItemRgb = optionExists("notItemRgb"); if (notItemRgb) itemRgb = FALSE; maxChromNameLength = optionInt("maxChromNameLength",0); dotIsNull = optionInt("dotIsNull",dotIsNull); noStrict = optionExists("noStrict") || optionExists("nostrict"); allowStartEqualEnd = optionExists("allowStartEqualEnd"); tmpDir = optionVal("tmpDir", tmpDir); nameIx = ! optionExists("noNameIx"); ignoreEmpty = optionExists("ignoreEmpty"); allowNegativeScores = optionExists("allowNegativeScores"); customTrackLoader = optionExists("customTrackLoader"); parseType(); /* turns on: noNameIx, ignoreEmpty, allowStartEqualEnd, allowNegativeScores * -verbose=0 */ if (customTrackLoader) { type = NULL; /* because customTrack/Factory has already validated the input */ ignoreEmpty = TRUE; noHistory = TRUE; nameIx = FALSE; allowStartEqualEnd = TRUE; allowNegativeScores = TRUE; verboseSetLevel(0); expireSeconds = 1200; /* 20 minutes */ (void) signal(SIGALRM, selfApoptosis); (void) alarm(expireSeconds); /* CGI timeout */ } fillInScoreColumn = optionVal("fillInScore", NULL); chromInfo=optionVal("chromInfo", NULL); if (chromInfo) { if (!type) errAbort("Only use chromInfo with type for validate"); // Get chromInfo from file chrHash = chromHashFromFile(chromInfo); } else if (type) { // Get chromInfo from DB chrHash = chromHashFromDatabase(argv[1]); } hgLoadBed(argv[1], argv[2], argc-3, argv+3); return 0; }
struct tagStorm *idfToStormTop(char *fileName) /* Convert an idf.txt format file to a tagStorm with a single top-level stanza */ { /* Create a tag storm with one as yet empty stanza */ struct tagStorm *storm = tagStormNew(fileName); struct tagStanza *stanza = tagStanzaNew(storm, NULL); /* Some stuff to help turn File_Data1, File_Data2, etc to a comma separated list */ char *additionalFilePrefix = "idf.Comment_AdditionalFile_Data"; struct dyString *additionalFileDy = dyStringNew(0); /* There can be multiple secondary accession tags, so handle these too */ char *secondaryAccessionTag = "idf.Comment_SecondaryAccession"; struct dyString *secondaryAccessionDy = dyStringNew(0); /* Parse lines from idf file into stanza */ struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line; struct dyString *dyVal = dyStringNew(0); while (lineFileNextReal(lf, &line)) { /* Erase trailing tab... */ eraseTrailingSpaces(line); /* Parse line into tab-separated array and make sure it's a reasonable size */ char *row[256]; int rowSize = chopTabs(line, row); if (rowSize == ArraySize(row)) errAbort("Line %d of %s has too many fields", lf->lineIx, lf->fileName); if (rowSize < 2) continue; /* Convert first element to tagName */ char tagName[256]; aeFieldToNormalField("idf.", trimSpaces(row[0]), tagName, sizeof(tagName)); /* Special case where we already are a comma separated list */ if (sameString(tagName, "idf.Publication_Author_List")) { tagStanzaAppend(storm, stanza, tagName, row[1]); } else if (startsWith(additionalFilePrefix, tagName)) { csvEscapeAndAppend(additionalFileDy, row[1]); } else if (sameString(secondaryAccessionTag, tagName)) { csvEscapeAndAppend(secondaryAccessionDy, row[1]); } else { /* Convert rest of elements to possibly comma separated values */ dyStringClear(dyVal); int i; for (i=1; i<rowSize; ++i) csvEscapeAndAppend(dyVal, row[i]); tagStanzaAppend(storm, stanza, tagName, dyVal->string); } } if (additionalFileDy->stringSize != 0) tagStanzaAppend(storm, stanza, additionalFilePrefix, additionalFileDy->string); if (secondaryAccessionDy->stringSize != 0) tagStanzaAppend(storm, stanza, secondaryAccessionTag, secondaryAccessionDy->string); dyStringFree(&secondaryAccessionDy); dyStringFree(&additionalFileDy); dyStringFree(&dyVal); lineFileClose(&lf); return storm; }
void badFormat(struct lineFile *lf) /* Complain that format looks off. */ { errAbort("Bad format line %d of %s", lf->lineIx, lf->fileName); }
struct nameOff *scanIntronFile(char *preIntronQ, char *startIntronQ, char *endIntronQ, char *postIntronQ, boolean invert) { char intronFileName[600]; FILE *f; char lineBuf[4*1024]; char *words[4*128]; int wordCount; int lineCount = 0; int preLenQ = strlen(preIntronQ); int startLenQ = strlen(startIntronQ); int endLenQ = strlen(endIntronQ); int postLenQ = strlen(postIntronQ); char *preIntronF, *startIntronF, *endIntronF, *postIntronF; int preLenF, startLenF, endLenF, postLenF; int preIx = 6, startIx = 7, endIx =8, postIx = 9; struct nameOff *list = NULL, *el; boolean addIt; int i; if (preLenQ > 25 || postLenQ > 25 || startLenQ > 40 || endLenQ > 40) { errAbort("Can only handle queries up to 25 bases on either side of the intron " "and 40 bases inside the intron."); } sprintf(intronFileName, "%s%s", wormCdnaDir(), "introns.txt"); f = mustOpen(intronFileName, "r"); while (fgets(lineBuf, sizeof(lineBuf), f) != NULL) { ++lineCount; wordCount = chopByWhite(lineBuf, words, ArraySize(words)); if (wordCount == ArraySize(words)) { warn("May have truncated end of line %d of %s", lineCount, intronFileName); } if (wordCount == 0) continue; if (wordCount < 11) errAbort("Unexpected short line %d of %s", lineCount, intronFileName); preIntronF = words[preIx]; startIntronF = words[startIx]; endIntronF = words[endIx]; postIntronF = words[postIx]; preLenF = strlen(preIntronF); startLenF = strlen(startIntronF); endLenF = strlen(endIntronF); postLenF = strlen(postIntronF); addIt = FALSE; if ( ( preLenQ == 0 || patMatch(preIntronQ, preIntronF+preLenF-preLenQ+countSpecial(preIntronQ), preLenQ)) && (startLenQ == 0 || patMatch(startIntronQ, startIntronF, startLenQ)) && ( endLenQ == 0 || patMatch(endIntronQ, endIntronF+endLenF-endLenQ+countSpecial(endIntronQ), endLenQ)) && ( postLenQ == 0 || patMatch(postIntronQ, postIntronF, postLenQ)) ) { addIt = TRUE; } if (invert) addIt = !addIt; if (addIt) { addIntronToHistogram(preIntronF+preLenF, startIntronF, endIntronF+endLenF, postIntronF); AllocVar(el); el->chrom = cloneString(words[1]); el->name = cloneString(words[5]); el->start = atoi(words[2]); el->end = atoi(words[3]); el->cdnaCount = atoi(words[0]); memcpy(el->startI, startIntronF, 2); memcpy(el->endI, endIntronF + endLenF - 2, 2); assert(wordCount == el->cdnaCount + 10); for (i=10; i<wordCount; ++i) { struct slName *name = newSlName(words[i]); slAddHead(&el->cdnaNames, name); } slReverse(&el->cdnaNames); assert(slCount(el->cdnaNames) == el->cdnaCount); slAddHead(&list, el); } } fclose(f); slSort(&list, cmpCounts); return list; }
struct hash *makeProbeBed(char *inGff, char *outBed) /* Convert probe location GFF file to BED. */ { struct lineFile *lf = lineFileOpen(inGff, TRUE); char *row[9]; struct hash *hash = newHash(16); FILE *f = mustOpen(outBed, "w"); while (lineFileNextRowTab(lf, row, ArraySize(row))) { int chromIx = romanToArabicChrom(row[0], lf); int start = lineFileNeedNum(lf, row, 3) - 1; int end = lineFileNeedNum(lf, row, 4); char *s = row[8]; char *probe, *orf, *note; char *boundAt = "Bound at "; struct tfBinding *tfbList = NULL, *tfb; if (!startsWith("Probe ", s)) errAbort("Expecting 9th column to start with 'Probe ' line %d of %s", lf->lineIx, lf->fileName); probe = nextWord(&s); orf = nextWord(&s); chopOff(orf, ';'); note = nextWord(&s); if (!sameWord("Note", note)) errAbort("Expecting 'note' in 9th column line %d of %s", lf->lineIx, lf->fileName); s = skipLeadingSpaces(s); if (!parseQuotedString(s, s, NULL)) errAbort("Expecting quoted string in 9th column line %d of %s", lf->lineIx, lf->fileName); if (startsWith("Bad Probe", s)) continue; else if (startsWith("Not bound", s)) { /* Ok, we do nothing. */ } else if (startsWith(boundAt, s)) { while (s != NULL && startsWith(boundAt, s)) { char *word, *by; double binding; s += strlen(boundAt); word = nextWord(&s); binding = atof(word); by = nextWord(&s); if (!sameString("by:", by)) errAbort("Expecting by: line %d of %s", lf->lineIx, lf->fileName); while ((word = nextWord(&s)) != NULL) { char lastChar = 0, *e; e = word + strlen(word) - 1; lastChar = *e; if (lastChar == ';' || lastChar == ',') *e = 0; AllocVar(tfb); tfb->binding = binding; tfb->tf = cloneString(word); slAddHead(&tfbList, tfb); if (lastChar == ';') break; } s = skipLeadingSpaces(s); } slReverse(&tfbList); } else { errAbort("Expecting %s in note line %d of %s", boundAt, lf->lineIx, lf->fileName); } fprintf(f, "chr%d\t%d\t%d\t", chromIx+1, start, end); fprintf(f, "%s\t%d\t", orf, slCount(tfbList)); for (tfb = tfbList; tfb != NULL; tfb = tfb->next) fprintf(f, "%s,", tfb->tf); fprintf(f, "\t"); for (tfb = tfbList; tfb != NULL; tfb = tfb->next) fprintf(f, "%4.3f,", tfb->binding); fprintf(f, "\n"); hashAdd(hash, orf, NULL); } lineFileClose(&lf); carefulClose(&f); return hash; }
void dbTrash(char *db) /* dbTrash - drop tables from a database older than specified N hours. */ { char query[256]; struct sqlResult *sr; char **row; int updateTimeIx; int createTimeIx; int dataLengthIx; int indexLengthIx; int nameIx; int timeIxUsed; unsigned long long totalSize = 0; // expiredTableNames: table exists and is in metaInfo and subject to age limits struct slName *expiredTableNames = NULL; struct slName *lostTables = NULL; // tables existing but not in metaInfo unsigned long long lostTableCount = 0; struct hash *expiredHash = newHash(10); // as determined by metaInfo struct hash *notExpiredHash = newHash(10); struct sqlConnection *conn = sqlConnect(db); if (extFileCheck) checkExtFile(conn); time_t ageSeconds = (time_t)(ageHours * 3600); /* age in seconds */ sqlSafef(query,sizeof(query),"select name,UNIX_TIMESTAMP(lastUse) from %s WHERE " "lastUse < DATE_SUB(NOW(), INTERVAL %ld SECOND);", CT_META_INFO,ageSeconds); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) hashAddInt(expiredHash, row[0], sqlSigned(row[1])); sqlFreeResult(&sr); sqlSafef(query,sizeof(query),"select name,UNIX_TIMESTAMP(lastUse) from %s WHERE " "lastUse >= DATE_SUB(NOW(), INTERVAL %ld SECOND);",CT_META_INFO,ageSeconds); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) hashAddInt(notExpiredHash, row[0], sqlSigned(row[1])); sqlFreeResult(&sr); if (tableStatus) // show table status is very expensive, use only when asked { /* run through the table status business to get table size information */ sqlSafef(query,sizeof(query),"show table status"); STATUS_INIT; while ((row = sqlNextRow(sr)) != NULL) { /* if not doing history too, and this is the history table, next row */ if ((!historyToo) && (sameWord(row[nameIx],"history"))) continue; /* also skip the metaInfo table */ if ((!historyToo) && (sameWord(row[nameIx],CT_META_INFO))) continue; /* don't delete the extFile table */ if (sameWord(row[nameIx],CT_EXTFILE)) continue; SCAN_STATUS; if (hashLookup(expiredHash,row[nameIx])) { slNameAddHead(&expiredTableNames, row[nameIx]); verbose(3,"%s %ld drop %s\n",row[timeIxUsed], (unsigned long)timep, row[nameIx]); /* If sizes are non-NULL, add them up */ if ( ((char *)NULL != row[dataLengthIx]) && ((char *)NULL != row[indexLengthIx]) ) totalSize += sqlLongLong(row[dataLengthIx]) + sqlLongLong(row[indexLengthIx]); hashRemove(expiredHash, row[nameIx]); } else { if (hashLookup(notExpiredHash,row[nameIx])) verbose(3,"%s %ld OK %s\n",row[timeIxUsed], (unsigned long)timep, row[nameIx]); else { /* table exists, but not in metaInfo, is it old enough ? */ if (timep < dropTime) { slNameAddHead(&expiredTableNames, row[nameIx]); verbose(2,"%s %ld dropt %s lost table\n", row[timeIxUsed], (unsigned long)timep, row[nameIx]); /* If sizes are non-NULL, add them up */ if ( ((char *)NULL != row[dataLengthIx]) && ((char *)NULL != row[indexLengthIx]) ) totalSize += sqlLongLong(row[dataLengthIx]) + sqlLongLong(row[indexLengthIx]); } else verbose(3,"%s %ld OKt %s\n",row[timeIxUsed], (unsigned long)timep, row[nameIx]); } } } sqlFreeResult(&sr); } else { // simple 'show tables' is more efficient than 'show table status' sqlSafef(query,sizeof(query),"show tables"); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { if (hashLookup(expiredHash,row[0])) { slNameAddHead(&expiredTableNames, row[0]); time_t lastUse = (time_t)hashIntVal(expiredHash,row[0]); struct tm *lastUseTm = localtime(&lastUse); verbose(3,"%4d-%02d-%02d %02d:%02d:%02d %ld drop %s\n", lastUseTm->tm_year+1900, lastUseTm->tm_mon+1, lastUseTm->tm_mday, lastUseTm->tm_hour, lastUseTm->tm_min, lastUseTm->tm_sec, (unsigned long)lastUse,row[0]); hashRemove(expiredHash, row[0]); } else if (hashLookup(notExpiredHash,row[0])) { time_t lastUse = (time_t)hashIntVal(notExpiredHash,row[0]); struct tm *lastUseTm = localtime(&lastUse); verbose(3,"%4d-%02d-%02d %02d:%02d:%02d %ld OK %s\n", lastUseTm->tm_year+1900, lastUseTm->tm_mon+1, lastUseTm->tm_mday, lastUseTm->tm_hour, lastUseTm->tm_min, lastUseTm->tm_sec, (unsigned long)lastUse,row[0]); } else { struct slName *el = slNameNew(row[0]); slAddHead(&lostTables, el); } } sqlFreeResult(&sr); lostTableCount = slCount(lostTables); // If tables exist, but not in metaInfo, check their age to expire them. // It turns out even this show table status is slow too, so, only // run thru it if asked to eliminate lost tables. It is better to // do this operation with the stand-alone perl script on the customTrash // database machine. if (delLostTable && lostTables) { struct slName *el; for (el = lostTables; el != NULL; el = el->next) { if (sameWord(el->name,"history")) continue; if (sameWord(el->name,CT_META_INFO)) continue; if (sameWord(el->name,CT_EXTFILE)) continue; boolean oneTableOnly = FALSE; // protect against multiple tables /* get table time information to see if it is expired */ sqlSafef(query,sizeof(query),"show table status like '%s'", el->name); STATUS_INIT; while ((row = sqlNextRow(sr)) != NULL) { if (oneTableOnly) errAbort("ERROR: query: '%s' returned more than one table " "name\n", query); else oneTableOnly = TRUE; if (differentWord(row[nameIx], el->name)) errAbort("ERROR: query: '%s' did not return table name '%s' != '%s'\n", query, el->name, row[nameIx]); SCAN_STATUS; if (timep < dropTime) { slNameAddHead(&expiredTableNames, row[nameIx]); verbose(2,"%s %ld dropt %s lost table\n", row[timeIxUsed], (unsigned long)timep, row[nameIx]); } else verbose(3,"%s %ld OKt %s\n", row[timeIxUsed], (unsigned long)timep, row[nameIx]); } sqlFreeResult(&sr); } } } /* perhaps the table was already dropped, but not from the metaInfo */ struct hashEl *elList = hashElListHash(expiredHash); struct hashEl *el; for (el = elList; el != NULL; el = el->next) { verbose(2,"%s exists in %s only\n", el->name, CT_META_INFO); if (drop) ctTouchLastUse(conn, el->name, FALSE); /* removes metaInfo row */ } if (drop) { char comment[256]; if (expiredTableNames) { struct slName *el; int droppedCount = 0; /* customTrash DB user permissions do not have permissions to * drop tables. Must use standard special user that has all * permissions. If we are not using the standard user at this * point, then switch to it. */ if (sameWord(db,CUSTOM_TRASH)) { sqlDisconnect(&conn); conn = sqlConnect(db); } for (el = expiredTableNames; el != NULL; el = el->next) { verbose(2,"# drop %s\n", el->name); sqlDropTable(conn, el->name); ctTouchLastUse(conn, el->name, FALSE); /* removes metaInfo row */ ++droppedCount; } /* add a comment to the history table and finish up connection */ if (tableStatus) safef(comment, sizeof(comment), "Dropped %d tables with " "total size %llu, %llu lost tables", droppedCount, totalSize, lostTableCount); else safef(comment, sizeof(comment), "Dropped %d tables, no size info, %llu lost tables", droppedCount, lostTableCount); verbose(2,"# %s\n", comment); hgHistoryComment(conn, "%s", comment); } else { safef(comment, sizeof(comment), "Dropped no tables, none expired, %llu lost tables", lostTableCount); verbose(2,"# %s\n", comment); } } else { char comment[256]; if (expiredTableNames) { int droppedCount = slCount(expiredTableNames); if (tableStatus) safef(comment, sizeof(comment), "Would have dropped %d tables with " "total size %llu, %llu lost tables", droppedCount, totalSize, lostTableCount); else safef(comment, sizeof(comment), "Would have dropped %d tables, no size info, %llu lost tables", droppedCount, lostTableCount); verbose(2,"# %s\n", comment); } else { safef(comment, sizeof(comment), "Would have dropped no tables, none expired, %llu lost tables", lostTableCount); verbose(2,"# %s\n", comment); } } sqlDisconnect(&conn); }
struct hash *makeMotifBed(char *gffDir, char *outBed) /* Make bed file from GFFs. Return hash of transcription factors. */ { static char *consLevelPath[3] = {"3", "2", "0"}; static char *consLevelBed[3] = {"2", "1", "0"}; static char *pLevelPath[3] = {"p001b", "p005b", "nobind"}; static char *pLevelBed[3] = {"good", "weak", "none"}; int cIx, pIx; FILE *f = mustOpen(outBed, "w"); struct hash *tfHash = newHash(0); struct hash *yrcHash = newHash(18); struct yrc *yrcList = NULL, *yrc; for (cIx=0; cIx<3; ++cIx) { for (pIx=0; pIx<3; ++pIx) { struct lineFile *lf; char *row[10]; char fileName[PATH_LEN]; char hashKey[256]; safef(fileName, sizeof(fileName), "%s/IGR_v24.%s.%s.GFF", gffDir, consLevelPath[cIx], pLevelPath[pIx]); lf = lineFileOpen(fileName, TRUE); while (lineFileRow(lf, row)) { char *name = row[9]; char *e; int chromIx, chromStart, chromEnd; if (!sameWord(row[8], "Site")) errAbort("Expecting 'Site' line %d of %s", lf->lineIx, lf->fileName); e = strchr(name, ';'); if (e == NULL) errAbort("Expecting semicolon line %d of %s", lf->lineIx, lf->fileName); *e = 0; chromIx = romanToArabicChrom(row[0], lf); chromStart = lineFileNeedNum(lf, row, 3); chromEnd = lineFileNeedNum(lf, row, 4); safef(hashKey, sizeof(hashKey), "%s.%d.%d", name, chromIx, chromStart); if ((yrc = hashFindVal(yrcHash, hashKey)) == NULL) { AllocVar(yrc); yrc->chromIx= chromIx; yrc->chromStart = chromStart; yrc->chromEnd = chromEnd; yrc->name = hashStoreName(tfHash, name); yrc->pLevel = pIx; yrc->consLevel = cIx; hashAdd(yrcHash, hashKey, yrc); slAddHead(&yrcList, yrc); } else { if (pIx < yrc->pLevel) yrc->pLevel = pIx; if (cIx < yrc->consLevel) yrc->consLevel = cIx; } } lineFileClose(&lf); } } for (yrc = yrcList; yrc != NULL; yrc = yrc->next) { fprintf(f, "chr%d\t", yrc->chromIx+1); fprintf(f, "%d\t", yrc->chromStart); fprintf(f, "%d\t", yrc->chromEnd); fprintf(f, "%s\t", yrc->name); fprintf(f, "%d\t", (int)(1000/(yrc->pLevel + yrc->consLevel + 1))); fprintf(f, "%s\t", pLevelBed[yrc->pLevel]); fprintf(f, "%s\n", consLevelBed[yrc->consLevel]); } carefulClose(&f); hashFree(&yrcHash); return tfHash; }
void testOutSequence(struct htmlPage *tablePage, struct htmlForm *mainForm, char *org, char *db, char *group, char *track, char *table, int expectedRows) /* Get as sequence and make sure count agrees with expected. */ /* mainForm not used */ { struct htmlPage *outPage; int attempts = 0; struct htmlFormVar *typeVar; if (tablePage->forms == NULL) errAbort("testOutSequence: Missing form (tablePage)"); htmlPageSetVar(tablePage, NULL, hgtaOutputType, "sequence"); outPage = quickSubmit(tablePage, org, db, group, track, table, "seqUi1", hgtaDoTopSubmit, "submit"); while (outPage == NULL && attempts < MAX_ATTEMPTS) { printf("testOutSequence: trying again to get seqUi1\n"); outPage = quickSubmit(tablePage, org, db, group, track, table, "seqUi1", hgtaDoTopSubmit, "submit"); attempts++; } if (outPage == NULL) { qaStatusSoftError(tablesTestList->status, "Error in testOutSequence - couldn't get outPage"); return; } if (outPage->forms == NULL) { qaStatusSoftError(tablesTestList->status, "Error in testOutSequence - missing form"); htmlPageFree(&outPage); return; } /* Since some genomic sequence things are huge, this will * only test in case where it's a gene prediction. */ typeVar = htmlFormVarGet(outPage->forms, hgtaGeneSeqType); if (typeVar != NULL) { struct htmlPage *seqPage; static char *types[] = {"protein", "mRNA"}; int i; for (i=0; i<ArraySize(types); ++i) { char *type = types[i]; if (slNameInList(typeVar->values, type)) { struct htmlPage *page; char testName[128]; htmlPageSetVar(outPage, NULL, hgtaGeneSeqType, type); safef(testName, sizeof(testName), "%sSeq", type); page = quickSubmit(outPage, org, db, group, track, table, testName, hgtaDoGenePredSequence, "submit"); checkFaOutput(page, expectedRows, TRUE); htmlPageFree(&page); } } htmlPageSetVar(outPage, NULL, hgtaGeneSeqType, "genomic"); serialSubmit(&outPage, org, db, group, track, table, "seqUi2", hgtaDoGenePredSequence, "submit"); // check that outPage != NULL /* On genomic page uncheck intron if it's there, then get results * and count them. */ if (htmlFormVarGet(outPage->forms, "hgSeq.intron") != NULL) htmlPageSetVar(outPage, NULL, "hgSeq.intron", NULL); seqPage = quickSubmit(outPage, org, db, group, track, table, "genomicSeq", hgtaDoGenomicDna, "submit"); // check that seqPage != NULL checkFaOutput(seqPage, expectedRows, FALSE); htmlPageFree(&seqPage); } htmlPageFree(&outPage); }
static void doPslMapAli(struct sqlConnection *conn, int taxon, char *db, int fromTaxon, char *fromDb) { char cmd[256]; struct dyString *dy = dyStringNew(0); char path[256]; char dnaPath[256]; char toDb[12]; safef(toDb,sizeof(toDb),"%s", db); toDb[0]=toupper(toDb[0]); safef(dnaPath,sizeof(dnaPath),"/cluster/data/%s/nib", db); if (!fileExists(dnaPath)) { safef(dnaPath,sizeof(dnaPath),"/cluster/data/%s/%s.2bit", db, db); if (!fileExists(dnaPath)) errAbort("unable to locate nib dir or .2bit for %s: %s", db, dnaPath); } safef(path,sizeof(path),"/gbdb/%s/liftOver/%sTo%s.over.chain.gz", fromDb, fromDb, toDb); if (!fileExists(path)) errAbort("unable to locate chain file %s",path); /* get non-bac $db.vgProbes not yet aligned */ getPslMapAli(conn, db, fromTaxon, fromDb, FALSE); /* get bac $db.vgProbes not yet aligned */ getPslMapAli(conn, db, fromTaxon, fromDb, TRUE); /* get .fa for pslRecalcMatch use */ getPslMapFa(conn, db, fromTaxon); /* non-bac */ safef(cmd,sizeof(cmd), "zcat %s | pslMap -chainMapFile -swapMap nonBac.psl stdin stdout " "| sort -k 14,14 -k 16,16n > unscoredNB.psl" ,path); verbose(1,"%s\n",cmd); system(cmd); safef(cmd,sizeof(cmd), "pslRecalcMatch unscoredNB.psl %s" " pslMap.fa nonBac.psl" ,dnaPath); verbose(1,"%s\n",cmd); system(cmd); /* bac */ safef(cmd,sizeof(cmd), "zcat %s | pslMap -chainMapFile -swapMap bac.psl stdin stdout " "| sort -k 14,14 -k 16,16n > unscoredB.psl" ,path); verbose(1,"%s\n",cmd); system(cmd); safef(cmd,sizeof(cmd), "pslRecalcMatch unscoredB.psl %s" " pslMap.fa bacTemp.psl" ,dnaPath); verbose(1,"%s\n",cmd); system(cmd); safef(cmd,sizeof(cmd), "pslCDnaFilter -globalNearBest=0.00001 -minCover=0.05" " bacTemp.psl bac.psl"); verbose(1,"%s\n",cmd); system(cmd); safef(cmd,sizeof(cmd),"cat bac.psl nonBac.psl > vgPrbPslMap.psl"); verbose(1,"%s\n",cmd); system(cmd); dyStringFree(&dy); }
int splatCheck1(char *inFa, char *inSplat, char *outMiss, char *outWrong) /* splatCheck1 - Check that all the test set really is being covered.. */ { struct lineFile *lf = lineFileOpen(inSplat, TRUE); FILE *missF = mustOpen(outMiss, "w"); FILE *badF = mustOpen(outWrong, "w"); char *row[7]; struct hash *allHash = hashFaNames(inFa); struct hash *mappedHash = hashNew(0); /* Keep track of reads we've seen here. */ struct hash *goodHash = hashNew(0); /* Keep track of good reads here. */ while (lineFileRow(lf, row)) { /* Read in line and parse it, track it. */ int chromStart = sqlUnsigned(row[1]); char *strand = row[5]; char *name = row[6]; hashStore(mappedHash, name); /* Parse out name field to figure out where we expect it to map. */ char *pt = name; char *expectStrand = "+"; if (startsWith("RC_", pt)) { pt += 3; expectStrand = "-"; } char *nameCopy = cloneString(pt); char *parts[4]; int partCount = chopByChar(nameCopy, '_', parts, ArraySize(parts)); if (partCount != 3) errAbort("Can't parse name field line %d of %s", lf->lineIx, lf->fileName); int expectStart = sqlUnsigned(parts[1]); if (sameString(strand, expectStrand)) { int diff = intAbs(chromStart - expectStart); if (diff <= 2) { hashStore(goodHash, name); } } freeMem(nameCopy); } struct hashEl *hel, *helList = hashElListHash(allHash); int allCount = allHash->elCount; int missCount = 0, badCount = 0; for (hel = helList; hel != NULL; hel = hel->next) { char *name = hel->name; if (!hashLookup(mappedHash, name)) { fprintf(missF, "%s\n", name); ++missCount; } else { if (!hashLookup(goodHash, hel->name)) { fprintf(badF, "%s\n", hel->name); ++badCount; } } } carefulClose(&badF); carefulClose(&missF); verbose(1, "Total reads %d\n", allCount); verbose(1, "Unmapped %d (%5.2f%%)\n", missCount, 100.0*missCount/allCount); verbose(1, "Mapped wrong %d (%5.2f%%)\n", badCount, 100.0*badCount/allCount); return -(missCount + badCount); }
void pslSort2(char *outFile, char *tempDir) /* Do second step of sort - merge all sorted files in tempDir * to final. */ { char fileName[512]; struct slName *tmpList, *tmp; struct midFile *midList = NULL, *mid; int aliCount = 0; FILE *f = mustOpen(outFile, "w"); if (!nohead) pslWriteHead(f); tmpList = listDir(tempDir, "tmp*.psl"); if (tmpList == NULL) errAbort("No tmp*.psl files in %s\n", tempDir); for (tmp = tmpList; tmp != NULL; tmp = tmp->next) { sprintf(fileName, "%s/%s", tempDir, tmp->name); AllocVar(mid); mid->lf = pslFileOpen(fileName); slAddHead(&midList, mid); } verbose(1, "writing %s", outFile); fflush(stdout); /* Write out the lowest sorting line from mid list until done. */ for (;;) { struct midFile *bestMid = NULL; if ( (++aliCount & 0xffff) == 0) { verboseDot(); fflush(stdout); } for (mid = midList; mid != NULL; mid = mid->next) { if (mid->lf != NULL && mid->psl == NULL) { if ((mid->psl = nextPsl(mid->lf)) == NULL) lineFileClose(&mid->lf); } if (mid->psl != NULL) { if (bestMid == NULL || pslCmpQuery(&mid->psl, &bestMid->psl) < 0) bestMid = mid; } } if (bestMid == NULL) break; pslTabOut(bestMid->psl, f); pslFree(&bestMid->psl); } printf("\n"); fclose(f); /* The followint really shouldn't be necessary.... */ for (mid = midList; mid != NULL; mid = mid->next) lineFileClose(&mid->lf); printf("Cleaning up temp files\n"); for (tmp = tmpList; tmp != NULL; tmp = tmp->next) { sprintf(fileName, "%s/%s", tempDir, tmp->name); remove(fileName); } }