Exemplo n.º 1
0
void pslSort(char *command, char *outFile, char *tempDir, char *inDirs[], int inDirCount)
/* Do the two step sort. */
{
int i;
struct slName *fileList = NULL, *name;
char *inDir;
struct slName *dirDir, *dirFile;
char fileName[512];
int fileCount;
int totalFilesProcessed = 0;
int filesPerMidFile;
int midFileCount = 0;
FILE *f;
struct lineFile *lf;
boolean doReflect = FALSE;
boolean suppressSelf = FALSE;
boolean firstOnly = endsWith(command, "1");
boolean secondOnly = endsWith(command, "2");

if (startsWith("dirs", command))
    ;
else if (startsWith("g2g", command))
    {
    doReflect = TRUE;
    suppressSelf = TRUE;
    }
else
    usage();


if (!secondOnly)
    {
    makeDir(tempDir);
    /* Figure out how many files to process. */
    for (i=0; i<inDirCount; ++i)
	{
	inDir = inDirs[i];
	dirDir = listDir(inDir, "*.psl");
	if (slCount(dirDir) == 0)
	    dirDir = listDir(inDir, "*.psl.gz");
	if (slCount(dirDir) == 0)
	    errAbort("No psl files in %s\n", inDir);
	verbose(1, "%s with %d files\n", inDir, slCount(dirDir));
	for (dirFile = dirDir; dirFile != NULL; dirFile = dirFile->next)
	    {
	    sprintf(fileName, "%s/%s", inDir, dirFile->name);
	    name = newSlName(fileName);
	    slAddHead(&fileList, name);
	    }
	slFreeList(&dirDir);
	}
    verbose(1, "%d files in %d dirs\n", slCount(fileList), inDirCount);
    slReverse(&fileList);
    fileCount = slCount(fileList);
    filesPerMidFile = round(sqrt(fileCount));
    // if (filesPerMidFile > 20)
	// filesPerMidFile = 20;  /* bandaide! Should keep track of mem usage. */
    verbose(1, "Got %d files %d files per mid file\n", fileCount, filesPerMidFile);

    /* Read in files a group at a time, sort, and write merged, sorted
     * output of one group. */
    name = fileList;
    while (totalFilesProcessed < fileCount)
	{
	int filesInMidFile = 0;
	struct psl *pslList = NULL, *psl;
	int lfileCount = 0;
	struct lm *lm = lmInit(256*1024);

	for (filesInMidFile = 0; filesInMidFile < filesPerMidFile && name != NULL;
	    ++filesInMidFile, ++totalFilesProcessed, name = name->next)
	    {
	    boolean reflectMe = FALSE;
	    if (doReflect)
		{
		reflectMe = !selfFile(name->name);
		}
	    verbose(2, "Reading %s (%d of %d)\n", name->name, totalFilesProcessed+1, fileCount);
	    lf = pslFileOpen(name->name);
	    while ((psl = nextLmPsl(lf, lm)) != NULL)
		{
		if (psl->qStart == psl->tStart && psl->strand[0] == '+' && 
		    suppressSelf && sameString(psl->qName, psl->tName))
		    {
		    continue;
		    }
		++lfileCount;
		slAddHead(&pslList, psl);
		if (reflectMe)
		    {
		    psl = mirrorLmPsl(psl, lm);
		    slAddHead(&pslList, psl);
		    }
		}
	    lineFileClose(&lf);
	    }
	slSort(&pslList, pslCmpQuery);
	makeMidName(tempDir, midFileCount, fileName);
	verbose(1, "Writing %s\n", fileName);
	f = mustOpen(fileName, "w");
	if (!nohead)
	    pslWriteHead(f);
	for (psl = pslList; psl != NULL; psl = psl->next)
	    {
	    pslTabOut(psl, f);
	    }
	fclose(f);
	pslList = NULL;
	lmCleanup(&lm);
	verbose(2, "lfileCount %d\n", lfileCount);
	++midFileCount;
	}
    }
if (!firstOnly)
    pslSort2(outFile, tempDir);
}
Exemplo n.º 2
0
int netConnectHttps(char *hostName, int port)
/* Start https connection with server or die. */
{
errAbort("No openssl available in netConnectHttps for %s : %d", hostName, port);
return -1;   /* will never get to here, make compiler happy */
}
Exemplo n.º 3
0
void usage() 
{
errAbort("vulgarToPsl - Convert the vulgar exonerate format to PSL.\n"
	 "usage:\n"
	 "   vulgarToPsl input.vul proteinQ.fa dnaT.fa output.psl");
}
void hgLoadChromGraph(boolean doLoad, char *db, char *track, char *fileName)
/* hgLoadChromGraph - Load up chromosome graph. */
{
double minVal,maxVal;
struct chromGraph *el, *list;
FILE *f;
char *tempDir = ".";
char path[PATH_LEN], gbdbPath[PATH_LEN];
char *idTable = optionVal("idTable", NULL);
char *pathPrefix = NULL;

if (idTable == NULL)
    list = chromGraphLoadAll(fileName);
else 
    list = chromGraphListWithTable(fileName, db, idTable);
if (list == NULL)
    errAbort("%s is empty", fileName);

/* Figure out min/max values */
minVal = maxVal = list->val;
for (el = list->next; el != NULL; el = el->next)
    {
    if (optionExists("minusLog10"))
	{
	if (el->val == 1)
	    el->val = 0;
	else if (el->val > 0)
	    el->val = -1 * log(el->val)/log(10);
	}
    if (el->val < minVal)
        minVal = el->val;
    if (el->val > maxVal)
        maxVal = el->val;
    }


/* Sort and write out temp file. */
slSort(&list, chromGraphCmp);
f = hgCreateTabFile(tempDir, track);
for (el = list; el != NULL; el = el->next)
    chromGraphTabOut(el, f);

if (doLoad)
    {
    struct dyString *dy = dyStringNew(0);
    struct sqlConnection *conn;

    /* Set up connection to database and create main table. */
    conn = hAllocConn(db);
    dyStringPrintf(dy, createString, track, hGetMinIndexLength(db));
    sqlRemakeTable(conn, track, dy->string);

    /* Load main table and clean up file handle. */
    hgLoadTabFile(conn, tempDir, track, &f);
    hgRemoveTabFile(tempDir, track);

    /* If need be create meta table.  If need be delete old row. */
    if (!sqlTableExists(conn, "metaChromGraph"))
	sqlUpdate(conn, metaCreateString);
    else
        {
	dyStringClear(dy);
	dyStringPrintf(dy, "delete from metaChromGraph where name = '%s'", 
		track);
	sqlUpdate(conn, dy->string);
	}

    /* Make chrom graph file */
    safef(path, sizeof(path), "%s.cgb", track);
    chromGraphToBin(list, path);
    safef(path, sizeof(path), "/gbdb/%s/chromGraph", db);
    pathPrefix = optionVal("pathPrefix", path);
    safef(gbdbPath, sizeof(gbdbPath), "%s/%s.cgb", pathPrefix, track);

    /* Create new line in meta table */
    dyStringClear(dy);
    dyStringPrintf(dy, "insert into metaChromGraph values('%s',%f,%f,'%s');",
    	track, minVal, maxVal, gbdbPath);
    sqlUpdate(conn, dy->string);
    }
}
Exemplo n.º 5
0
/* entry */
int main(int argc, char** argv)
{
char *selectFile, *inFile, *outFile, *dropFile;
optionInit(&argc, argv, optionSpecs);
if (argc != 4)
    usage("wrong # args");
selectFile = argv[1];
inFile = argv[2];
outFile = argv[3];

/* select file options */
if (optionExists("selectFmt") && optionExists("selectCoordCols"))
    errAbort("can't specify both -selectFmt and -selectCoordCols");

if (optionExists("selectFmt"))
    selectFmt = parseFormatSpec(optionVal("selectFmt", NULL));
else if (optionExists("selectCoordCols"))
    {
    selectCoordCols = coordColsParseSpec("selectCoordCols",
                                         optionVal("selectCoordCols", NULL));
    selectFmt = COORD_COLS_FMT;
    }
else
    selectFmt = getFileFormat(selectFile);

if (optionExists("selectCds"))
    selectCaOpts |= chromAnnCds;
if (optionExists("selectRange"))
    selectCaOpts |= chromAnnRange;
if ((selectFmt == PSLQ_FMT) || (selectFmt == CHAINQ_FMT))
    selectCaOpts |= chromAnnUseQSide;

/* in file options */
if (optionExists("inFmt") && optionExists("inCoordCols"))
    errAbort("can't specify both -inFmt and -inCoordCols");
if (optionExists("inFmt"))
    inFmt = parseFormatSpec(optionVal("inFmt", NULL));
else if (optionExists("inCoordCols"))
    {
    inCoordCols = coordColsParseSpec("inCoordCols",
                                     optionVal("inCoordCols", NULL));
    inFmt = COORD_COLS_FMT;
    }
else
    inFmt = getFileFormat(inFile);

inCaOpts = chromAnnSaveLines; // need lines for output
if (optionExists("inCds"))
    inCaOpts |= chromAnnCds;
if (optionExists("inRange"))
    inCaOpts |= chromAnnRange;
if ((inFmt == PSLQ_FMT) || (inFmt == CHAINQ_FMT))
    inCaOpts |= chromAnnUseQSide;

/* select options */
useAggregate = optionExists("aggregate");
nonOverlapping = optionExists("nonOverlapping");
if (optionExists("strand") && optionExists("oppositeStrand"))
    errAbort("can only specify one of -strand and -oppositeStrand");
if (optionExists("strand"))
    selectOpts |= selStrand;
if (optionExists("oppositeStrand"))
    selectOpts |= selOppositeStrand;
if (optionExists("excludeSelf") && (optionExists("idMatch")))
    errAbort("can't specify both -excludeSelf and -idMatch");
if (optionExists("excludeSelf"))
    selectOpts |= selExcludeSelf;
if (optionExists("idMatch"))
    selectOpts |= selIdMatch;

criteria.threshold = optionFloat("overlapThreshold", 0.0);
criteria.thresholdCeil = optionFloat("overlapThresholdCeil", 1.1);
criteria.similarity = optionFloat("overlapSimilarity", 0.0);
criteria.similarityCeil = optionFloat("overlapSimilarityCeil", 1.1);
criteria.bases = optionInt("overlapBases", -1);

/* output options */
mergeOutput = optionExists("mergeOutput");
idOutput = optionExists("idOutput");
statsOutput = optionExists("statsOutput") || optionExists("statsOutputAll") || optionExists("statsOutputBoth");
if ((mergeOutput + idOutput + statsOutput) > 1)
    errAbort("can only specify one of -mergeOutput, -idOutput, -statsOutput, -statsOutputAll, or -statsOutputBoth");
outputAll = optionExists("statsOutputAll");
outputBoth = optionExists("statsOutputBoth");
if (outputBoth)
    outputAll = TRUE;
if (mergeOutput)
    {
    if (nonOverlapping)
        errAbort("can't use -mergeOutput with -nonOverlapping");
    if (useAggregate)
        errAbort("can't use -mergeOutput with -aggregate");
    if ((selectFmt == CHAIN_FMT) || (selectFmt == CHAINQ_FMT)
        || (inFmt == CHAIN_FMT) || (inFmt == CHAINQ_FMT))
    if (useAggregate)
        errAbort("can't use -mergeOutput with chains");
    selectCaOpts |= chromAnnSaveLines;
    }
dropFile = optionVal("dropped", NULL);

/* check for options incompatible with aggregate mode */
if (useAggregate)
    {
    int i;
    for (i = 0; aggIncompatible[i] != NULL; i++)
        {
        if (optionExists(aggIncompatible[i]))
            errAbort("-%s is not allowed -aggregate", aggIncompatible[i]);
        }
    }

overlapSelect(selectFile, inFile, outFile, dropFile);
return 0;
}
static void processMrnaFa(struct sqlConnection *conn, int taxon, char *type, char *db)
/* process isPcr results  */
{

struct dyString *dy = dyStringNew(0);
struct lineFile *lf = lineFileOpen("mrna.fa", TRUE);
int lineSize;
char *line;
char *name;
char *dna;
boolean more = lineFileNext(lf, &line, &lineSize);
while(more)
    {
    if (line[0] != '>')
	errAbort("unexpected error out of phase\n");
    name = cloneString(line+1);
    verbose(2,"name=%s\n",name);
    dyStringClear(dy);
    while((more=lineFileNext(lf, &line, &lineSize)))
	{
	if (line[0] == '>')
	    {
	    break;
	    }
	dyStringAppend(dy,line);	    
	}
    dna = cloneString(dy->string);

    while(1)
	{
	int oldProbe = 0;
	dyStringClear(dy);
	dyStringPrintf(dy, "select id from vgPrb "
	   "where taxon=%d and type='%s' and tName='%s' and state='new'",taxon,type,name);
	oldProbe = sqlQuickNum(conn,dy->string);
	if (oldProbe==0)
	    break;       /* no more records match */
	    
	/* record exists and hasn't already been updated */
	
	int vgPrb = findVgPrbBySeq(conn,dna,taxon);
	
	if (vgPrb == 0)
	    {
	    dyStringClear(dy);
	    dyStringAppend(dy, "update vgPrb set");
	    dyStringAppend(dy, " seq = '");
	    dyStringAppend(dy, dna);
	    dyStringAppend(dy, "',\n");
	    dyStringPrintf(dy, " db = '%s',\n", db);
	    dyStringAppend(dy, " state = 'seq'\n");
	    dyStringPrintf(dy, " where id=%d\n", oldProbe);
	    dyStringPrintf(dy, " and state='%s'\n", "new");
	    verbose(2, "%s\n", dy->string);
	    sqlUpdate(conn, dy->string);
	    }
	else  /* probe seq already exists */ 
	    { 
	    /* just re-map the probe table recs to it */
	    dyStringClear(dy);
	    dyStringPrintf(dy, "update vgPrbMap set vgPrb=%d where vgPrb=%d",vgPrb,oldProbe);
	    sqlUpdate(conn, dy->string);
	    /* and delete it from vgPrb */
	    dyStringClear(dy);
	    dyStringPrintf(dy, "delete from vgPrb where id=%d",oldProbe);
	    sqlUpdate(conn, dy->string);
	    }
	    
	}    

    freez(&name);
    freez(&dna);
    }
lineFileClose(&lf);

dyStringFree(&dy);
}
Exemplo n.º 7
0
struct annoStreamer *annoStreamDbNew(char *db, char *table, struct annoAssembly *aa,
				     struct asObject *asObj, int maxOutRows)
/* Create an annoStreamer (subclass) object from a database table described by asObj. */
{
struct sqlConnection *conn = hAllocConn(db);
if (!sqlTableExists(conn, table))
    errAbort("annoStreamDbNew: table '%s' doesn't exist in database '%s'", table, db);
struct annoStreamDb *self = NULL;
AllocVar(self);
struct annoStreamer *streamer = &(self->streamer);
int dbtLen = strlen(db) + strlen(table) + 2;
char dbTable[dbtLen];
safef(dbTable, dbtLen, "%s.%s", db, table);
annoStreamerInit(streamer, aa, asObj, dbTable);
streamer->rowType = arWords;
streamer->setRegion = asdSetRegion;
streamer->nextRow = asdNextRow;
streamer->close = asdClose;
self->conn = conn;
self->table = cloneString(table);
char *asFirstColumnName = streamer->asObj->columnList->name;
if (sqlFieldIndex(self->conn, self->table, "bin") == 0)
    {
    self->hasBin = 1;
    self->minFinestBin = binFromRange(0, 1);
    }
if (self->hasBin && !sameString(asFirstColumnName, "bin"))
    self->omitBin = 1;
if (!asdInitBed3Fields(self))
    errAbort("annoStreamDbNew: can't figure out which fields of %s.%s to use as "
	     "{chrom, chromStart, chromEnd}.", db, table);
self->makeBaselineQuery = asdMakeBaselineQuery;
// When a table has an index on endField, sometimes the query optimizer uses it
// and that ruins the sorting.  Fortunately most tables don't anymore.
self->endFieldIndexName = sqlTableIndexOnField(self->conn, self->table, self->endField);
self->notSorted = FALSE;
// Special case: genbank-updated tables are not sorted because new mappings are
// tacked on at the end.
if (isIncrementallyUpdated(table))
    self->notSorted = TRUE;
self->mergeBins = FALSE;
self->maxOutRows = maxOutRows;
self->useMaxOutRows = (maxOutRows > 0);
self->needQuery = TRUE;
self->chromList = annoAssemblySeqNames(aa);
if (slCount(self->chromList) > 1000)
    {
    // Assembly has many sequences (e.g. scaffold-based assembly) --
    // don't break up into per-sequence queries.  Take our chances
    // with mysql being unhappy about the sqlResult being open too long.
    self->doQuery = asdDoQuerySimple;
    self->nextRowRaw = nextRowFromSqlResult;
    }
else
    {
    // All-chromosome assembly -- if table is large, perform a series of
    // chunked queries.
    self->doQuery = asdDoQueryChunking;
    self->nextRowRaw = nextRowFromBuffer;
    }
return (struct annoStreamer *)self;
}
Exemplo n.º 8
0
struct gapCalc *gapCalcRead(struct lineFile *lf)
/* Create gapCalc from open file. */
{
    int i, tableSize, startLong = -1;
    struct gapCalc *gapCalc;
    int *gapInitPos;
    double *gapInitQGap;
    double *gapInitTGap;
    double *gapInitBothGap;

    AllocVar(gapCalc);

    /* Parse file. */
    readTaggedNumLine(lf, "tableSize", 1, &tableSize, NULL);
    readTaggedNumLine(lf, "smallSize", 1, &gapCalc->smallSize, NULL);
    AllocArray(gapInitPos,tableSize);
    AllocArray(gapInitQGap,tableSize);
    AllocArray(gapInitTGap,tableSize);
    AllocArray(gapInitBothGap,tableSize);
    readTaggedNumLine(lf, "position", tableSize, gapInitPos, NULL);
    readTaggedNumLine(lf, "qGap", tableSize, NULL, gapInitQGap);
    readTaggedNumLine(lf, "tGap", tableSize, NULL, gapInitTGap);
    readTaggedNumLine(lf, "bothGap", tableSize, NULL, gapInitBothGap);

    /* Set up precomputed interpolations for small gaps. */
    AllocArray(gapCalc->qSmall, gapCalc->smallSize);
    AllocArray(gapCalc->tSmall, gapCalc->smallSize);
    AllocArray(gapCalc->bSmall, gapCalc->smallSize);
    for (i=1; i<gapCalc->smallSize; ++i)
    {
        gapCalc->qSmall[i] =
            interpolate(i, gapInitPos, gapInitQGap, tableSize);
        gapCalc->tSmall[i] =
            interpolate(i, gapInitPos, gapInitTGap, tableSize);
        gapCalc->bSmall[i] = interpolate(i, gapInitPos,
                                         gapInitBothGap, tableSize);
    }

    /* Set up to handle intermediate values. */
    for (i=0; i<tableSize; ++i)
    {
        if (gapCalc->smallSize == gapInitPos[i])
        {
            startLong = i;
            break;
        }
    }
    if (startLong < 0)
        errAbort("No position %d in gapCalcRead()\n", gapCalc->smallSize);
    gapCalc->longCount = tableSize - startLong;
    gapCalc->qPosCount = tableSize - startLong;
    gapCalc->tPosCount = tableSize - startLong;
    gapCalc->bPosCount = tableSize - startLong;
    gapCalc->longPos = cloneMem(gapInitPos + startLong, gapCalc->longCount * sizeof(int));
    gapCalc->qLong = cloneMem(gapInitQGap + startLong, gapCalc->qPosCount * sizeof(double));
    gapCalc->tLong = cloneMem(gapInitTGap + startLong, gapCalc->tPosCount * sizeof(double));
    gapCalc->bLong = cloneMem(gapInitBothGap + startLong, gapCalc->bPosCount * sizeof(double));

    /* Set up to handle huge values. */
    gapCalc->qLastPos = gapCalc->longPos[gapCalc->qPosCount-1];
    gapCalc->tLastPos = gapCalc->longPos[gapCalc->tPosCount-1];
    gapCalc->bLastPos = gapCalc->longPos[gapCalc->bPosCount-1];
    gapCalc->qLastPosVal = gapCalc->qLong[gapCalc->qPosCount-1];
    gapCalc->tLastPosVal = gapCalc->tLong[gapCalc->tPosCount-1];
    gapCalc->bLastPosVal = gapCalc->bLong[gapCalc->bPosCount-1];
    gapCalc->qLastSlope = calcSlope(gapCalc->qLastPosVal, gapCalc->qLong[gapCalc->qPosCount-2],
                                    gapCalc->qLastPos, gapCalc->longPos[gapCalc->qPosCount-2]);
    gapCalc->tLastSlope = calcSlope(gapCalc->tLastPosVal, gapCalc->tLong[gapCalc->tPosCount-2],
                                    gapCalc->tLastPos, gapCalc->longPos[gapCalc->tPosCount-2]);
    gapCalc->bLastSlope = calcSlope(gapCalc->bLastPosVal, gapCalc->bLong[gapCalc->bPosCount-2],
                                    gapCalc->bLastPos, gapCalc->longPos[gapCalc->bPosCount-2]);
    freez(&gapInitPos);
    freez(&gapInitQGap);
    freez(&gapInitTGap);
    freez(&gapInitBothGap);
    return gapCalc;
}
Exemplo n.º 9
0
void loadOneBed(struct lineFile *lf, int bedSize, struct bedStub **pList)
/* Load one bed file.  Make sure all lines have the correct number of fields.
 * Put results in *pList. */
{
char *words[64], *line, *dupe;
int wordCount;
struct bedStub *bed;
struct asObject *asObj = getAsObj(bedSize);
int fieldCount = getFieldCount(bedSize, asObj);
struct bed *validateBed;
AllocVar(validateBed);

verbose(1, "Reading %s\n", lf->fileName);
while (lineFileNextReal(lf, &line))
    {
    if (hasBin)
	nextWord(&line);
    dupe = cloneString(line);
    if (strictTab)
	wordCount = chopTabs(line, words);
    else
	wordCount = chopLine(line, words);
    /* ignore empty lines	*/
    if (0 == wordCount)
	continue;
    lineFileExpectWords(lf, fieldCount, wordCount);

    if (type)  
        // TODO also, may need to add a flag to the validateBed() interface to support -allowNegativeScores when not isCt
        //  although can probably get away without it since usually -allowNegativeScores is used by ct which has already verified it.
        //  thus -allowNegativeScores is unlikely to be used with -type.
	{
	loadAndValidateBed(words, typeBedN, fieldCount, lf, validateBed, asObj, FALSE);
	checkChromNameAndSize(lf, validateBed->chrom, validateBed->chromEnd);
	}

    AllocVar(bed);
    bed->chrom = cloneString(words[0]);
    bed->chromStart = lineFileNeedNum(lf, words, 1);
    bed->chromEnd = lineFileNeedNum(lf, words, 2);
    if (! noStrict)
	{
	if ((bed->chromEnd < 1) && !allowStartEqualEnd)
	    errAbort("ERROR: line %d:'%s'\nchromEnd is less than 1\n",
		     lf->lineIx, dupe);
	if (bed->chromStart == bed->chromEnd && !allowStartEqualEnd)
	    errAbort("ERROR: line %d:'%s'\nchromStart == chromEnd (%d) (zero-length item)\n"
		     "Use -allowStartEqualEnd if that is legit (e.g. for insertion point).\n",
		     lf->lineIx, dupe, bed->chromStart);
	if (bed->chromStart > bed->chromEnd)
	    errAbort("ERROR: line %d:'%s'\nchromStart after chromEnd (%d > %d)\n",
		     lf->lineIx, dupe, bed->chromStart, bed->chromEnd);
	}
    bed->line = dupe;
    slAddHead(pList, bed);
    }

if (asObj)
    asObjectFreeList(&asObj);
freez(&validateBed);
}
struct bbiFile *bbiFileOpen(char *fileName, bits32 sig, char *typeName)
/* Open up big wig or big bed file. */
{
/* This code needs to agree with code in two other places currently - bigBedFileCreate,
 * and bigWigFileCreate.  I'm thinking of refactoring to share at least between
 * bigBedFileCreate and bigWigFileCreate.  It'd be great so it could be structured
 * so that it could send the input in one chromosome at a time, and send in the zoom
 * stuff only after all the chromosomes are done.  This'd potentially reduce the memory
 * footprint by a factor of 2 or 4.  Still, for now it works. -JK */
struct bbiFile *bbi;
AllocVar(bbi);
bbi->fileName = cloneString(fileName);
struct udcFile *udc = bbi->udc = udcFileOpen(fileName, udcDefaultDir());

/* Read magic number at head of file and use it to see if we are proper file type, and
 * see if we are byte-swapped. */
bits32 magic;
boolean isSwapped = FALSE;
udcMustRead(udc, &magic, sizeof(magic));
if (magic != sig)
    {
    magic = byteSwap32(magic);
    isSwapped = TRUE;
    if (magic != sig)
       errAbort("%s is not a %s file", fileName, typeName);
    }
bbi->typeSig = sig;
bbi->isSwapped = isSwapped;

/* Read rest of defined bits of header, byte swapping as needed. */
bbi->version = udcReadBits16(udc, isSwapped);
bbi->zoomLevels = udcReadBits16(udc, isSwapped);
bbi->chromTreeOffset = udcReadBits64(udc, isSwapped);
bbi->unzoomedDataOffset = udcReadBits64(udc, isSwapped);
bbi->unzoomedIndexOffset = udcReadBits64(udc, isSwapped);
bbi->fieldCount = udcReadBits16(udc, isSwapped);
bbi->definedFieldCount = udcReadBits16(udc, isSwapped);
bbi->asOffset = udcReadBits64(udc, isSwapped);
bbi->totalSummaryOffset = udcReadBits64(udc, isSwapped);
bbi->uncompressBufSize = udcReadBits32(udc, isSwapped);

/* Skip over reserved area. */
udcSeek(udc, 64);

/* Read zoom headers. */
int i;
struct bbiZoomLevel *level, *levelList = NULL;
for (i=0; i<bbi->zoomLevels; ++i)
    {
    AllocVar(level);
    level->reductionLevel = udcReadBits32(udc, isSwapped);
    level->reserved = udcReadBits32(udc, isSwapped);
    level->dataOffset = udcReadBits64(udc, isSwapped);
    level->indexOffset = udcReadBits64(udc, isSwapped);
    slAddHead(&levelList, level);
    }
slReverse(&levelList);
bbi->levelList = levelList;

/* Attach B+ tree of chromosome names and ids. */
udcSeek(udc, bbi->chromTreeOffset);
bbi->chromBpt =  bptFileAttach(fileName, udc);

return bbi;
}
Exemplo n.º 11
0
boolean asCompareObjs(char *name1, struct asObject *as1, char *name2, struct asObject *as2, int numColumnsToCheck,
                      int *retNumColumnsSame, boolean abortOnDifference)
/* Compare as-objects as1 and as2 making sure several important fields show they are the same name and type.
 * If difference found, print it to stderr.  If abortOnDifference, errAbort.
 * Othewise, return TRUE if the objects columns match through the first numColumnsToCheck fields.
 * If retNumColumnsSame is not NULL, then it will be set to the number of contiguous matching columns. */
{
    boolean differencesFound = FALSE;
    struct asColumn *col1 = as1->columnList, *col2 = as2->columnList;
    int checkCount = 0;
    int verboseLevel = 2;
    if (abortOnDifference)
        verboseLevel = 1;
    if (as1->isTable != as2->isTable)
    {
        verbose(verboseLevel,"isTable does not match: %s=[%d]  %s=[%d]", name1, as1->isTable, name2, as2->isTable);
        differencesFound = TRUE;
    }
    else if (as1->isSimple != as2->isSimple)
    {
        verbose(verboseLevel,"isSimple does not match: %s=[%d]  %s=[%d]", name1, as1->isSimple, name2, as2->isSimple);
        differencesFound = TRUE;
    }
    else
    {
        if (!as1->isTable)
        {
            errAbort("asCompareObjLists only supports Table .as objects at this time.");
        }
        for (col1 = as1->columnList, col2 = as2->columnList;
                col1 != NULL && col2 != NULL && checkCount < numColumnsToCheck;
                col1 = col1->next, col2 = col2->next, ++checkCount)
        {
            if (!sameOk(col1->name, col2->name))
            {
                verbose(verboseLevel,"column #%d names do not match: %s=[%s]  %s=[%s]\n"
                        , checkCount+1, name1, col1->name, name2, col2->name);
                differencesFound = TRUE;
                break;
            }
            else if (col1->isSizeLink != col2->isSizeLink)
            {
                verbose(verboseLevel,"column #%d isSizeLink do not match: %s=[%d]  %s=[%d]\n"
                        , checkCount+1, name1, col1->isSizeLink, name2, col2->isSizeLink);
                differencesFound = TRUE;
                break;
            }
            else if (col1->isList != col2->isList)
            {
                verbose(verboseLevel,"column #%d isList do not match: %s=[%d]  %s=[%d]\n"
                        , checkCount+1, name1, col1->isList, name2, col2->isList);
                differencesFound = TRUE;
                break;
            }
            else if (col1->isArray != col2->isArray)
            {
                verbose(verboseLevel,"column #%d isArray do not match: %s=[%d]  %s=[%d]\n"
                        , checkCount+1, name1, col1->isArray, name2, col2->isArray);
                differencesFound = TRUE;
                break;
            }
            else if (!sameOk(col1->lowType->name, col2->lowType->name))
            {
                verbose(verboseLevel,"column #%d type names do not match: %s=[%s]  %s=[%s]\n"
                        , checkCount+1, name1, col1->lowType->name, name2, col2->lowType->name);
                differencesFound = TRUE;
                break;
            }
            else if (col1->fixedSize != col2->fixedSize)
            {
                verbose(verboseLevel,"column #%d fixedSize do not match: %s=[%d]  %s=[%d]\n"
                        , checkCount+1, name1, col1->fixedSize, name2, col2->fixedSize);
                differencesFound = TRUE;
                break;
            }
            else if (!sameOk(col1->linkedSizeName, col2->linkedSizeName))
            {
                verbose(verboseLevel,"column #%d linkedSizeName do not match: %s=[%s]  %s=[%s]\n"
                        , checkCount+1, name1, col1->linkedSizeName, name2, col2->linkedSizeName);
                differencesFound = TRUE;
                break;
            }
        }
        if (!differencesFound && checkCount < numColumnsToCheck)
            errAbort("Unexpected error in asCompareObjLists: asked to compare %d columns in %s and %s, but only found %d in one or both asObjects."
                     , numColumnsToCheck, name1, name2, checkCount);
    }
    if (differencesFound)
    {
        if (abortOnDifference)
            errAbort("asObjects differ.");
        else
            verbose(verboseLevel,"asObjects differ. Matching field count=%d\n", checkCount);
    }
    if (retNumColumnsSame)
        *retNumColumnsSame = checkCount;
    return (!differencesFound);
}
Exemplo n.º 12
0
void makeConfigFromEncodeList(char *input, char *output)
/* create config file for hgBedsToBedExps from tab-separated file of format
 *         <relDate> <fileName> <fileSize> <submitDate> <metadata> */
{
FILE *f = mustOpen(output, "w");
struct lineFile *lf = lineFileOpen(input, TRUE);
char *line;

while (lineFileNextReal(lf, &line))
    {
    /* Parse out line into major components. */
    char *releaseDate = nextWord(&line);
    char *fileName = nextWord(&line);
    char *fileSize = nextWord(&line);
    char *submitDate = nextWord(&line);
    char *metadata = trimSpaces(line);
    if (isEmpty(metadata))
        errAbort("line %d of %s is truncated", lf->lineIx, lf->fileName);

    verbose(2, "releaseDate=%s; fileName=%s; fileSize=%s; submitDate=%s; %s\n", 
    	releaseDate, fileName, fileSize, submitDate, metadata);


    /* Loop through metadata looking for cell and antibody.  Metadata
     * is in format this=that; that=two words; that=whatever */
    char *cell = NULL, *antibody = NULL;
    for (;;)
        {
	/* Find terminating semicolon if any replace it with zero, and
	 * note position for next time around loop. */
	metadata = skipLeadingSpaces(metadata);
	if (isEmpty(metadata))
	    break;
	char *semi = strchr(metadata, ';');
	if (semi != NULL)
	   *semi++ = 0;

	/* Parse out name/value pair. */
	char *name = metadata;
	char *value = strchr(metadata, '=');
	if (value == NULL)
	   errAbort("Missing '=' in metadata after tag %s in line %d of %s", 
	   	name, lf->lineIx, lf->fileName);
	*value++ = 0;
	name = trimSpaces(name);
	value = trimSpaces(value);

	/* Look for our tags. */
	if (sameString(name, "cell"))
	    cell = value;
	else if (sameString(name, "antibody"))
	    antibody = value;

	metadata = semi;
	}
    if (cell == NULL) 
        errAbort("No cell in metadata line %d of %s", lf->lineIx, lf->fileName);
    if (antibody == NULL) 
        errAbort("No antibody in metadata line %d of %s", lf->lineIx, lf->fileName);

    fprintf(f, "%s\t%s\t", antibody, cell);
    fprintf(f, "%s\t", cellAbbreviation(cell));
    fprintf(f, "file\t%d\t", scoreCol-1);
    fprintf(f, "%g", calcNormScoreFactor(fileName, scoreCol-1));
    fprintf(f, "\t%s\n", fileName);
    }
carefulClose(&f);
}
Exemplo n.º 13
0
void fakeFinContigs(char *agpName, char *faName, char *finDir, char *rootName, char *finFaDir, char *ooVer)
/* fakeFinContigs - Fake up contigs for a finished chromosome. */
{
struct contig *contigList = NULL, *contig = NULL;
struct agpFrag *agp;
struct lineFile *lf = lineFileOpen(agpName, TRUE);
char *line, *words[16];
int lineSize, wordCount;
int contigIx = 0;
char liftDir[512], contigDir[512], path[512];
char chrom[128];
FILE *f;
struct dnaSeq *seq;
int fragIx;

/* Build up contig list by scanning agp file. */
printf("Reading %s\n", lf->fileName);
while (lineFileNext(lf, &line, &lineSize))
    {
    if (line[0] == '#' || line[0] == 0)
        continue;
    wordCount = chopLine(line, words);
    if (wordCount < 5)
        errAbort("Expecting at least 5 words line %d of %s", lf->lineIx, lf->fileName);
    if (words[4][0] == 'N' || words[4][0] == 'U')
	{
        contig = NULL;
        continue;
	}
    lineFileExpectWords(lf, 9, wordCount);
    agp = agpFragLoad(words);
    // file is 1-based but agpFragLoad() now assumes 0-based:
    agp->chromStart -= 1;
    agp->fragStart  -= 1;
    if (contig == NULL)
	{
        AllocVar(contig);
	sprintf(contig->name, "%s%d", rootName, ++contigIx);
	contig->startOffset = agp->chromStart;
	slAddHead(&contigList, contig);
	}
    else 
        {
	if (contig->agpList != NULL && contig->agpList->chromEnd != agp->chromStart)
	    errAbort("Start doesn't match previous end line %d of %s", 
	    	lf->lineIx, lf->fileName);
	}
    if (agp->chromEnd - agp->chromStart != agp->fragEnd - agp->fragStart)
        errAbort("Chrom and frag size mismatch line %d of %s", lf->lineIx, lf->fileName);
    slAddHead(&contig->agpList, agp);
    contig->endOffset = agp->chromEnd;
    }
slReverse(&contigList);
for (contig = contigList; contig != NULL; contig = contig->next)
    slReverse(&contig->agpList);
lineFileClose(&lf);

/* Load up chromosome sequence and make sure it is in one piece. */
printf("Reading %s\n", faName);
seq = faReadAllDna(faName);
if (slCount(seq) != 1)
    errAbort("Got %d sequences in %s, can only handle one.", slCount(seq), faName);

/* Fix up agp coordinates. Make a directory for each contig.  Fill it with 
 * .fa .agp barge.NN files for that contig. */
printf("Writing contig dirs\n");
for (contig = contigList; contig != NULL; contig = contig->next)
    {
    /* Make Contig dir. */
    sprintf(contigDir, "%s/%s", finDir, contig->name);
    makeDir(contigDir);

    /* Make contig.agp file. */
    sprintf(path, "%s/%s.agp", contigDir, contig->name);
    f = mustOpen(path, "w");
    fragIx = 0;
    for (agp = contig->agpList; agp != NULL; agp = agp->next)
	{
	char buf[128];
	sprintf(buf, "%s/%s", skipChr(agp->chrom), contig->name);
	freez(&agp->chrom);
	agp->chrom = cloneString(buf);
	agp->chromStart -= contig->startOffset;
	agp->chromEnd -= contig->startOffset;
	agp->ix = ++fragIx;
	agpFragTabOut(agp, f);
	}
    carefulClose(&f);

    /* Make ooGreedy.NN.gl file */
    sprintf(path, "%s/%s.%s.gl", contigDir, "ooGreedy", ooVer);
    f = mustOpen(path, "w");
    for (agp = contig->agpList; agp != NULL; agp = agp->next)
        {
	if (agp->type[0] != 'N' && agp->type[0] != 'U')
	    {
	    fprintf(f, "%s_1\t%d\t%d\t%s\n",  agp->frag, 
	    	agp->chromStart, 
		agp->chromEnd,
	        agp->strand);
	    }
	}
    carefulClose(&f);

    /* Make contig.fa file. */
    sprintf(path, "%s/%s.fa", contigDir, contig->name);
    faWrite(path, contig->name, seq->dna + contig->startOffset, 
    	contig->endOffset - contig->startOffset);

    /* Make contig/barge file. */
    sprintf(path, "%s/barge.%s", contigDir, ooVer);
    f = mustOpen(path, "w");
    fprintf(f, "Barge (Connected Clone) File ooGreedy Version %s\n", ooVer);
    fprintf(f, "\n");
    fprintf(f, "start  accession  size overlap maxClone maxOverlap\n");
    fprintf(f, "------------------------------------------------------------\n");
    for (agp = contig->agpList; agp != NULL; agp = agp->next)
        {
	char clone[128];
	strcpy(clone, agp->frag);
	chopSuffix(clone);
	
	fprintf(f, "%d\t%s\t%d\t100\tn/a\t0\n", agp->chromStart, 
		clone, agp->chromEnd);
	}
    carefulClose(&f);

    /* Make contig/gold file. */
    sprintf(path, "%s/gold.%s", contigDir, ooVer);
    f = mustOpen(path, "w");
    fragIx = 0;
    for (agp = contig->agpList; agp != NULL; agp = agp->next)
        {
	char fragName[128];
	struct agpFrag frag = *agp;
	sprintf(fragName, "%s_1", agp->frag);
	frag.frag = fragName;
	frag.type[0] = '0';
	agpFragTabOut(&frag, f);
	}
    carefulClose(&f);
    }

/* Create lift subdirectory. */
printf("Creating lift files\n");
sprintf(liftDir, "%s/lift", finDir);
makeDir(liftDir);

/* Create lift/oOut.lst file (just a list of contigs). */
sprintf(path, "%s/oOut.lst", liftDir);
f = mustOpen(path, "w");
for (contig = contigList; contig != NULL; contig = contig->next)
    fprintf(f, "%s/%s.fa.out\n", contig->name, contig->name);
carefulClose(&f);

/* Create lift/ordered.lst file (just a list of contigs). */
sprintf(path, "%s/ordered.lst", liftDir);
f = mustOpen(path, "w");
for (contig = contigList; contig != NULL; contig = contig->next)
    fprintf(f, "%s\n", contig->name);
carefulClose(&f);

/* Create lift/ordered.lft file. */
sprintf(path, "%s/ordered.lft", liftDir);
f = mustOpen(path, "w");
splitPath(faName, NULL, chrom, NULL);
for (contig = contigList; contig != NULL; contig = contig->next)
    fprintf(f, "%d\t%s/%s\t%d\t%s\t%d\n", 
	contig->startOffset, skipChr(chrom), contig->name,  
	contig->endOffset - contig->startOffset,
	chrom, seq->size);
carefulClose(&f);
}
Exemplo n.º 14
0
void printMimeInfo(struct mimePart *mp, FILE *out, int level)
/* print mimeParts recursively if needed */
{

char *cd = NULL, *cdMain = NULL, *cdName = NULL, *cdFileName = NULL, 
 *ct = NULL, *ce = NULL;
char *margin = needMem(level+1);
int i = 0;
for(i=0;i<level;++i)
    margin[i] = ' ';
margin[level] = 0;    


cd = hashFindVal(mp->hdr,"content-disposition");
ct = hashFindVal(mp->hdr,"content-type");
ce = hashFindVal(mp->hdr,"content-transfer-encoding");

if (cd)
    {
    fprintf(out,"%scontent-disposition: %s\n",margin,cd);
    cdMain=getMimeHeaderMainVal(cd);
    cdName=getMimeHeaderFieldVal(cd,"name");
    fprintf(out,"%smain:[%s]\n",margin,cdMain);
    fprintf(out,"%sname:[%s]\n",margin,cdName);
    cdFileName=getMimeHeaderFieldVal(cd,"filename");
    if (cdFileName)
    	fprintf(out,"%sfilename:[%s]\n",margin,cdFileName);
    }
if (ct)
    fprintf(out,"%scontent-type: %s\n",margin,ct);
if (ce)
    fprintf(out,"%scontent-transer-encoding: %s\n",margin,ce);

if (cd)
    {
    fprintf(out,"%ssize:[%llu]\n",margin,(unsigned long long) mp->size);
    if (mp->binary)
    	fprintf(out,"%sbinary (contains zeros)\n",margin);
    if (mp->fileName)
	fprintf(out,"%sfileName=[%s]\n",margin, mp->fileName);
    fprintf(out,"%sdata:[%s]\n",margin,
    	mp->binary && mp->data ? "<binary data not safe to print>" : mp->data);
    fprintf(out,"\n");
    }

if (mp->data) 
    {
    }	    
else if (mp->fileName)
    {
    }
else if (mp->multi)
    {
    fprintf(out,"%snested MIME structure\n\n",margin);
    for(mp=mp->multi;mp;mp=mp->next)
	printMimeInfo(mp, out, level+1);
    }
else
    {
    errAbort("mp-> type not data,fileName, or multi - unexpected MIME structure");
    }

freez(&cdMain);
freez(&cdName);
freez(&cdFileName);
freez(&margin);

}
int main(int argc, char *argv[])
/* Process command line. */
{
struct sqlConnection *conn = NULL;
char *command = NULL;
optionInit(&argc, argv, options);
database = optionVal("database", database);
sqlPath = optionVal("sqlPath", sqlPath);
if (argc < 2)
    usage();
command = argv[1];
if (argc >= 3)
    setCurrentDir(argv[2]);
conn = sqlConnect(database);
if (sameWord(command,"INIT"))
    {
    if (argc != 2)
	usage();
    errAbort("INIT is probably too dangerous. DO NOT USE.");
    /*	    
    init(conn);	    
    */
    }
else if (sameWord(command,"POP"))
    {
    if (argc != 2)
	usage();
    /* populate vgPrb where missing */
    populateMissingVgPrb(conn);
    }
else if (sameWord(command,"SEQ"))
    {
    if (argc != 4)
	usage();
    /* make fake probe sequences */
    makeFakeProbeSeq(conn,argv[3]);
    }
else if (sameWord(command,"ALI"))
    {
    if (argc != 4)
	usage();
    /* blat anything left that is not aligned, 
      nor even attempted */
    doAlignments(conn,argv[3]);
    }
else if (sameWord(command,"EXT"))
    {
    if (argc != 4)
	usage();
    /* update seq and extfile as necessary */
    doSeqAndExtFile(conn,argv[3],"vgProbes");
    }
else if (sameWord(command,"PSLMAP"))
    {
    if (argc != 5)
	usage();
    /* pslMap anything left that is not aligned, 
      nor even attempted */
    doAlignmentsPslMap(conn,argv[3],argv[4]);
    }
else if (sameWord(command,"REMAP"))
    {
    if (argc != 7)
	usage();
    /* re-map anything in track specified that is not aligned, 
      nor even attempted yet, using specified fasta file. */
    doAlignmentsReMap(conn,argv[3],argv[4],argv[5],argv[6]);
    }
else if (sameWord(command,"SELFMAP"))
    {
    if (argc != 4)
	usage();
    /* re-map anything in track specified that is not aligned, 
      nor even attempted yet, using specified fasta file. */
    doAlignmentsSelfMap(conn,argv[3]);
    }
else if (sameWord(command,"EXTALL"))
    {
    if (argc != 4)
	usage();
    /* update seq and extfile as necessary */
    doSeqAndExtFile(conn,argv[3],"vgAllProbes");
    }
else
    usage();
sqlDisconnect(&conn);
return 0;
}
Exemplo n.º 16
0
void writeBedTab(char *fileName, struct bedStub *bedList)
/* Write out bed list to tab-separated file. */
{
struct bedStub *bed;
FILE *f = mustOpen(fileName, "w");
char *words[64];
int i, wordCount;
for (bed = bedList; bed != NULL; bed = bed->next)
    {
    if (!noBin)
        {
        // allow for zero-length at start of seq [bin code can't handle 0-0]
        unsigned end = (bed->chromEnd > 0) ? bed->chromEnd : 1;
        if (fprintf(f, "%u\t", hFindBin(bed->chromStart, end)) <= 0)
	    writeFailed(fileName);
        }
    if (strictTab)
	wordCount = chopTabs(bed->line, words);
    else
	wordCount = chopLine(bed->line, words);
    for (i=0; i<wordCount; ++i)
        {
	/*	new definition for old "reserved" field, now itemRgb */
	/*	and when itemRgb, it is a comma separated string r,g,b */
	if (itemRgb && (i == 8))
	    {
	    char *comma;
	    /*  Allow comma separated list of rgb values here   */
	    comma = strchr(words[8], ',');
	    if (comma)
		{
		int itemRgb = 0;
		if (-1 == (itemRgb = bedParseRgb(words[8])))
		    errAbort("ERROR: expecting r,g,b specification, "
				"found: '%s'", words[8]);
		else
		    if (fprintf(f, "%d", itemRgb) <= 0)
			writeFailed(fileName);

		verbose(2, "itemRgb: %s, rgb: %#x\n", words[8], itemRgb);
		}
	    else
		if (fputs(words[i], f) == EOF)
		    writeFailed(fileName);
	    }
	else if ((dotIsNull > 0) && (dotIsNull == i) && sameString(words[i],"."))
        /* If the . was used to represent NULL, replace with -1 in the tables */
	    {
	    if (fputs("-1", f) == EOF)
		writeFailed(fileName);
	    }
	else
	    if (fputs(words[i], f) == EOF)
		writeFailed(fileName);

	if (i == wordCount-1)
	    {
	    if (fputc('\n', f) == EOF)
		writeFailed(fileName);
	    }
	else
	    if (fputc('\t', f) == EOF)
		writeFailed(fileName);
	}
    }
fclose(f);
}
static void processIsPcr(struct sqlConnection *conn, int taxon, char *db)
/* process isPcr results  */
{

/* >NM_010919:371+1088 2 718bp CGCGGATCCAAGGACATCTTGGACCTTCCG CCCAAGCTTGCATGTGCTGCAGCGACTGCG */

struct dyString *dy = dyStringNew(0);
struct lineFile *lf = lineFileOpen("isPcr.fa", TRUE);
int lineSize;
char *line;
char *name;
char *dna;
char *word, *end;
char *tName;
int tStart;
int tEnd;
char *tStrand;
int probeid=0;  /* really a vgPrb id */
boolean more = lineFileNext(lf, &line, &lineSize);
while(more)
    {
    if (line[0] != '>')
	errAbort("unexpected error out of phase\n");
    name = cloneString(line);
    verbose(1,"name=%s\n",name);
    dyStringClear(dy);
    while((more=lineFileNext(lf, &line, &lineSize)))
	{
	if (line[0] == '>')
	    {
	    break;
	    }
	dyStringAppend(dy,line);	    
	}
    dna = cloneString(dy->string);
    word = name+1;
    end = strchr(word,':');
    tName = cloneStringZ(word,end-word); 
    word = end+1;
    end = strchr(word,'+');
    tStrand = "+";
    if (!end)
	{
	end = strchr(word,'-');
	tStrand = "-";
	}
    tStart = atoi(word); 
    word = end+1;
    end = strchr(word,' ');
    tEnd = atoi(word); 
    word = end+1;
    end = strchr(word,' ');
    probeid = atoi(word); 

    dyStringClear(dy);
    dyStringPrintf(dy, "select count(*) from vgPrb where id=%d and state='new'",probeid);
    if (sqlQuickNum(conn,dy->string)>0)
	{
	/* record exists and hasn't already been updated */

	int vgPrb = findVgPrbBySeq(conn,dna,taxon);
	
	if (vgPrb == 0)
	    {
	    dyStringClear(dy);
	    dyStringAppend(dy, "update vgPrb set");
	    dyStringAppend(dy, " seq='");
	    dyStringAppend(dy, dna);
	    dyStringAppend(dy, "',\n");
	    dyStringPrintf(dy, " tName='%s',\n", tName);
	    dyStringPrintf(dy, " tStart=%d,\n", tStart);
	    dyStringPrintf(dy, " tEnd=%d,\n", tEnd);
	    dyStringPrintf(dy, " tStrand='%s',\n", tStrand);
	    dyStringPrintf(dy, " db='%s',\n", db);
	    dyStringPrintf(dy, " state='%s'\n", "seq");
	    dyStringPrintf(dy, " where id=%d\n", probeid);
	    dyStringPrintf(dy, " and state='%s'\n", "new");
	    verbose(2, "%s\n", dy->string);
	    sqlUpdate(conn, dy->string);
	    }
	else  /* probe seq already exists */ 
	    { 
	    /* just re-map the probe table recs to it */
	    dyStringClear(dy);
	    dyStringPrintf(dy, "update vgPrbMap set vgPrb=%d where vgPrb=%d",vgPrb,probeid);
	    sqlUpdate(conn, dy->string);
	    /* and delete it from vgPrb */
	    dyStringClear(dy);
	    dyStringPrintf(dy, "delete from vgPrb where id=%d",probeid);
	    sqlUpdate(conn, dy->string);
	    }
	}
    
    freez(&tName);
    freez(&name);
    freez(&dna);
    }
lineFileClose(&lf);

dyStringFree(&dy);
}
Exemplo n.º 18
0
static void loadDatabase(char *database, char *track, int bedSize, struct bedStub *bedList)
/* Load database from bedList. */
{
struct sqlConnection *conn;
struct dyString *dy = newDyString(1024);
char *tab = (char *)NULL;
int loadOptions = (optionExists("onServer") ? SQL_TAB_FILE_ON_SERVER : 0);

if ( ! noLoad )
    conn = sqlConnect(database);

if ((char *)NULL != tmpDir)
    tab = cloneString(rTempName(tmpDir,"loadBed",".tab"));
else
    tab = cloneString("bed.tab");

if (bedDetail && sqlTable == NULL && !customTrackLoader) 
    errAbort("bedDetail format requires sqlTable option");
if (bedDetail && !strictTab) 
    errAbort("bedDetail format must be tab separated");
if (bedDetail && !noBin) 
    noBin = TRUE;

/* First make table definition. */
if (sqlTable != NULL && !oldTable)
    {
    /* Read from file. */
    char *sql, *s;
    readInGulp(sqlTable, &sql, NULL);
    /* Chop off end-of-statement semicolon if need be. */
    s = strchr(sql, ';');
    if (s != NULL) *s = 0;
    
    if ( !noLoad )
        {
        if (renameSqlTable)
            {
            char *pos = stringIn("CREATE TABLE ", sql);
            if (pos == NULL)
                errAbort("Can't find CREATE TABLE in %s\n", sqlTable);
            char *oldSql = cloneString(sql);
            nextWord(&pos); nextWord(&pos);
            char *tableName = nextWord(&pos);
            sql = replaceChars(oldSql, tableName, track);
            }
        verbose(1, "Creating table definition for %s from sql: %s\n", track, sqlTable);
	// add NOSQLINJ tag
	sqlDyStringPrintf(dy, "%-s", sql);
        sqlRemakeTable(conn, track, dy->string);
        if (!noBin) 
	    addBinToEmptyTable(conn, track);
	adjustSqlTableColumns(conn, track, bedSize);
	}
    
    freez(&sql);
    }
else if (!oldTable)
    {
    int minLength;

    if (noLoad)
	minLength=6;
    else if (maxChromNameLength)
	minLength = maxChromNameLength;
    else
	minLength = hGetMinIndexLength(database);
    verbose(2, "INDEX chrom length: %d\n", minLength);

    /* Create definition statement. */
    verbose(1, "Creating table definition for %s, bedSize: %d\n", track, bedSize);
    sqlDyStringPrintf(dy, "CREATE TABLE %s (\n", track);
    if (!noBin)
       dyStringAppend(dy, "  bin smallint unsigned not null,\n");
    dyStringAppend(dy, "  chrom varchar(255) not null,\n");
    dyStringAppend(dy, "  chromStart int unsigned not null,\n");
    dyStringAppend(dy, "  chromEnd int unsigned not null,\n");
    if (bedSize >= 4)
       maybeBedGraph(4, dy, "  name varchar(255) not null,\n");
    if (bedSize >= 5)
	{
	if (allowNegativeScores)
	    maybeBedGraph(5, dy, "  score int not null,\n");
	else
	    maybeBedGraph(5, dy, "  score int unsigned not null,\n");
	}
    if (bedSize >= 6)
       maybeBedGraph(6, dy, "  strand char(1) not null,\n");
    if (bedSize >= 7)
       maybeBedGraph(7, dy, "  thickStart int unsigned not null,\n");
    if (bedSize >= 8)
       maybeBedGraph(8, dy, "  thickEnd int unsigned not null,\n");
    /*	As of 2004-11-22 the reserved field is used as itemRgb in code */
    if (bedSize >= 9)
       maybeBedGraph(9, dy, "  reserved int unsigned  not null,\n");
    if (bedSize >= 10)
       maybeBedGraph(10, dy, "  blockCount int unsigned not null,\n");
    if (bedSize >= 11)
       maybeBedGraph(11, dy, "  blockSizes longblob not null,\n");
    if (bedSize >= 12)
       maybeBedGraph(12, dy, "  chromStarts longblob not null,\n");
    if (bedSize >= 13)
       maybeBedGraph(13, dy, "  expCount int unsigned not null,\n");
    if (bedSize >= 14)
       maybeBedGraph(14, dy, "  expIds longblob not null,\n");
    if (bedSize >= 15)
       maybeBedGraph(15, dy, "  expScores longblob not null,\n");
    dyStringAppend(dy, "#Indices\n");
    if (nameIx && (bedSize >= 4) && (0 == bedGraph))
       dyStringAppend(dy, "  INDEX(name(16)),\n");
    if (noBin)
	{
	dyStringPrintf(dy, "  INDEX(chrom(%d),chromStart)\n", minLength);
	}
    else
	{
        dyStringPrintf(dy, "  INDEX(chrom(%d),bin)\n", minLength);
	}
    dyStringAppend(dy, ")\n");
    if (noLoad)
	verbose(2,"%s", dy->string);
    else
	sqlRemakeTable(conn, track, dy->string);
    }

verbose(1, "Saving %s\n", tab);
writeBedTab(tab, bedList);

if ( ! noLoad )
    {
    verbose(1, "Loading %s\n", database);
    if (customTrackLoader)
	sqlLoadTabFile(conn, tab, track, loadOptions|SQL_TAB_FILE_WARN_ON_WARN);
    else
	sqlLoadTabFile(conn, tab, track, loadOptions);

    if (! noHistory)
	hgHistoryComment(conn, "Add %d element(s) from bed list to %s table",
			 slCount(bedList), track);
    if(fillInScoreColumn != NULL)
        {
        char query[500];
        char buf[500];
        struct sqlResult *sr;
        sqlSafef(query, sizeof(query), "select sum(score) from %s", track);
        if(sqlQuickQuery(conn, query, buf, sizeof(buf)))
            {
            unsigned sum = sqlUnsigned(buf);
            if (!sum)
                {
                sqlSafef(query, sizeof(query), "select min(%s), max(%s) from %s", fillInScoreColumn, fillInScoreColumn, track);
                if ((sr = sqlGetResult(conn, query)) != NULL)
                    {
                    char **row = sqlNextRow(sr);
                    if(row != NULL)
                        {
                        float min = sqlFloat(row[0]);
                        float max = sqlFloat(row[1]);
			if ( !(max == -1 && min == -1)) // if score is -1 then ignore, as if it werent present
			    {
			    if (max == min || sameString(row[0],row[1])) // this will lead to 'inf' score value in SQL update causing an error
				errAbort("Could not set score in table %s max(%s)=min(%s)=%s\n", track, fillInScoreColumn, fillInScoreColumn, row[0]);
                            sqlFreeResult(&sr);

			    // Calculate a, b s/t f(x) = ax + b maps min-max => minScore-1000
			    float a = (1000-minScore) / (max - min);
			    float b = 1000 - ((1000-minScore) * max) / (max - min);

			    sqlSafef(query, sizeof(query), "update %s set score = round((%f * %s) + %f)",  track, a, fillInScoreColumn, b);
			    int changed = sqlUpdateRows(conn, query, NULL);
			    verbose(2, "update query: %s; changed: %d\n", query, changed);
			    }
			else
			    {
                            sqlFreeResult(&sr);
			    verbose(2, "score not updated; all values for column %s are -1\n", fillInScoreColumn);
			    }
			}
                    }
                }
            }

        }
    sqlDisconnect(&conn);
    /*	if temp dir specified, unlink file to make it disappear */
    if ((char *)NULL != tmpDir)
	unlink(tab);
    }
else
    verbose(1, "No load option selected, see file: %s\n", tab);

}	/*	static void loadDatabase()	*/
Exemplo n.º 19
0
static void asdDoQueryChunking(struct annoStreamDb *self, char *minChrom, uint minEnd)
/* Return a sqlResult for a query on table items in position range.
 * If doing a whole genome query, just select all rows from table. */
{
struct annoStreamer *sSelf = &(self->streamer);
boolean hasWhere = FALSE;
struct dyString *query = self->makeBaselineQuery(self, &hasWhere);
if (sSelf->chrom != NULL && self->rowBuf.size > 0 && !self->doNextChunk)
    {
    // We're doing a region query, we already got some rows, and don't need another chunk:
    resetRowBuf(&self->rowBuf);
    self->eof = TRUE;
    }
if (self->useMaxOutRows)
    {
    self->maxOutRows -= self->rowBuf.size;
    if (self->maxOutRows <= 0)
	self->eof = TRUE;
    }
if (self->eof)
    return;
int queryMaxItems = ASD_CHUNK_SIZE;
if (self->useMaxOutRows && self->maxOutRows < queryMaxItems)
    queryMaxItems = self->maxOutRows;
if (self->hasBin)
    {
    // Results will be in bin order, but we can restore chromStart order by
    // accumulating initial coarse-bin items and merge-sorting them with
    // subsequent finest-bin items which will be in chromStart order.
    if (self->doNextChunk && self->mergeBins && !self->gotFinestBin)
	errAbort("annoStreamDb %s: can't continue merge in chunking query; "
		 "increase ASD_CHUNK_SIZE", sSelf->name);
    self->mergeBins = TRUE;
    if (self->qLm == NULL)
	self->qLm = lmInit(0);
    }
if (self->endFieldIndexName != NULL)
    // Don't let mysql use a (chrom, chromEnd) index because that messes up
    // sorting by chromStart.
    sqlDyStringPrintf(query, " IGNORE INDEX (%s) ", self->endFieldIndexName);
if (sSelf->chrom != NULL)
    {
    uint start = sSelf->regionStart;
    if (minChrom)
	{
	if (differentString(minChrom, sSelf->chrom))
	    errAbort("annoStreamDb %s: nextRow minChrom='%s' but region chrom='%s'",
		     sSelf->name, minChrom, sSelf->chrom);
	if (start < minEnd)
	    start = minEnd;
	}
    if (self->doNextChunk && start < self->nextChunkStart)
	start = self->nextChunkStart;
    sqlDyStringAppend(query, hasWhere ? " and " : " where ");
    sqlDyStringPrintf(query, "%s = '%s' and ", self->chromField, sSelf->chrom);
    if (self->hasBin)
	{
	if (self->doNextChunk && self->gotFinestBin)
	    // It would be way more elegant to make a hAddBinTopLevelOnly but this will do:
	    dyStringPrintf(query, "bin > %d and ", self->minFinestBin);
	hAddBinToQuery(start, sSelf->regionEnd, query);
	}
    if (self->doNextChunk)
	sqlDyStringPrintf(query, "%s >= %u and ", self->startField, self->nextChunkStart);
    sqlDyStringPrintf(query, "%s < %u and %s > %u ", self->startField, sSelf->regionEnd,
		      self->endField, start);
    if (self->notSorted)
	sqlDyStringPrintf(query, "order by %s ", self->startField);
    sqlDyStringPrintf(query, "limit %d", queryMaxItems);
    bufferRowsFromSqlQuery(self, query->string, queryMaxItems);
    if (self->rowBuf.size == 0)
	self->eof = TRUE;
    }
else
    {
    // Genome-wide query: break it into chrom-by-chrom queries.
    if (self->queryChrom == NULL)
	self->queryChrom = self->chromList;
    else if (!self->doNextChunk)
	{
	self->queryChrom = self->queryChrom->next;
	resetMergeState(self);
	}
    if (minChrom != NULL)
	{
	// Skip chroms that precede minChrom
	while (self->queryChrom != NULL && strcmp(self->queryChrom->name, minChrom) < 0)
	    {
	    self->queryChrom = self->queryChrom->next;
	    self->doNextChunk = FALSE;
	    resetMergeState(self);
	    }
	if (self->hasBin)
	    {
	    self->mergeBins = TRUE;
	    if (self->qLm == NULL)
		self->qLm = lmInit(0);
	    }
	}
    if (self->queryChrom == NULL)
	self->eof = TRUE;
    else
	{
	char *chrom = self->queryChrom->name;
	int start = 0;
	if (minChrom != NULL && sameString(chrom, minChrom))
	    start = minEnd;
	if (self->doNextChunk && start < self->nextChunkStart)
	    start = self->nextChunkStart;
	uint end = annoAssemblySeqSize(self->streamer.assembly, self->queryChrom->name);
	sqlDyStringAppend(query, hasWhere ? " and " : " where ");
	sqlDyStringPrintf(query, "%s = '%s' ", self->chromField, chrom);
	if (start > 0 || self->doNextChunk)
	    {
	    dyStringAppend(query, "and ");
	    if (self->hasBin)
		{
		if (self->doNextChunk && self->gotFinestBin)
		    // It would be way more elegant to make a hAddBinTopLevelOnly but this will do:
		    dyStringPrintf(query, "bin > %d and ", self->minFinestBin);
		hAddBinToQuery(start, end, query);
		}
	    if (self->doNextChunk)
		sqlDyStringPrintf(query, "%s >= %u and ", self->startField, self->nextChunkStart);
	    // region end is chromSize, so no need to constrain startField here:
	    sqlDyStringPrintf(query, "%s > %u ", self->endField, start);
	    }
	if (self->notSorted)
	    sqlDyStringPrintf(query, "order by %s ", self->startField);
	dyStringPrintf(query, "limit %d", queryMaxItems);
	bufferRowsFromSqlQuery(self, query->string, queryMaxItems);
	// If there happens to be no items on chrom, try again with the next chrom:
	if (! self->eof && self->rowBuf.size == 0)
	    asdDoQueryChunking(self, minChrom, minEnd);
	}
    }
dyStringFree(&query);
}
Exemplo n.º 20
0
int main(int argc, char *argv[])
/* Process command line. */
{
char *chromInfo;
optionInit(&argc, argv, optionSpecs);
if (argc < 4)
    usage();
noBin = optionExists("noBin") || optionExists("nobin");
noSort = optionExists("noSort");
strictTab = optionExists("tab");
oldTable = optionExists("oldTable");
sqlTable = optionVal("sqlTable", sqlTable);
renameSqlTable = optionExists("renameSqlTable");
trimSqlTable = optionExists("trimSqlTable");
as = optionVal("as", as);
type = optionVal("type", type);
hasBin = optionExists("hasBin");
noLoad = optionExists("noLoad");
noHistory = optionExists("noHistory");
bedGraph = optionInt("bedGraph",0);
bedDetail = optionExists("bedDetail");
minScore = optionInt("minScore",100);
if (minScore<0 || minScore>1000)
    errAbort("minScore must be between 0-1000\n");
notItemRgb = optionExists("notItemRgb");
if (notItemRgb) itemRgb = FALSE;
maxChromNameLength = optionInt("maxChromNameLength",0);
dotIsNull = optionInt("dotIsNull",dotIsNull);
noStrict = optionExists("noStrict") || optionExists("nostrict");
allowStartEqualEnd = optionExists("allowStartEqualEnd");
tmpDir = optionVal("tmpDir", tmpDir);
nameIx = ! optionExists("noNameIx");
ignoreEmpty = optionExists("ignoreEmpty");
allowNegativeScores = optionExists("allowNegativeScores");
customTrackLoader = optionExists("customTrackLoader");
parseType();
/* turns on: noNameIx, ignoreEmpty, allowStartEqualEnd, allowNegativeScores
 * -verbose=0 */
if (customTrackLoader)
    {
    type = NULL;   /* because customTrack/Factory has already validated the input */
    ignoreEmpty = TRUE;
    noHistory = TRUE;
    nameIx = FALSE;
    allowStartEqualEnd = TRUE;
    allowNegativeScores = TRUE;
    verboseSetLevel(0);
    expireSeconds = 1200;	/* 20 minutes */
    (void) signal(SIGALRM, selfApoptosis);
    (void) alarm(expireSeconds);	/* CGI timeout */
    }
fillInScoreColumn = optionVal("fillInScore", NULL);

chromInfo=optionVal("chromInfo", NULL);
if (chromInfo)
    {
    if (!type)
	errAbort("Only use chromInfo with type for validate");
    // Get chromInfo from file
    chrHash = chromHashFromFile(chromInfo); 
    }
else if (type)
    {
    // Get chromInfo from DB
    chrHash = chromHashFromDatabase(argv[1]); 
    }

hgLoadBed(argv[1], argv[2], argc-3, argv+3);
return 0;
}
Exemplo n.º 21
0
struct tagStorm *idfToStormTop(char *fileName)
/* Convert an idf.txt format file to a tagStorm with a single top-level stanza */
{
/* Create a tag storm with one as yet empty stanza */
struct tagStorm *storm = tagStormNew(fileName);
struct tagStanza *stanza = tagStanzaNew(storm, NULL);

/* Some stuff to help turn File_Data1, File_Data2, etc to a comma separated list */
char *additionalFilePrefix = "idf.Comment_AdditionalFile_Data";
struct dyString *additionalFileDy = dyStringNew(0);

/* There can be multiple secondary accession tags, so handle these too */
char *secondaryAccessionTag = "idf.Comment_SecondaryAccession";
struct dyString *secondaryAccessionDy = dyStringNew(0);


/* Parse lines from idf file into stanza */
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *line;
struct dyString *dyVal = dyStringNew(0);
while (lineFileNextReal(lf, &line))
    {
    /* Erase trailing tab... */
    eraseTrailingSpaces(line);

    /* Parse line into tab-separated array and make sure it's a reasonable size */
    char *row[256];
    int rowSize = chopTabs(line, row);
    if (rowSize == ArraySize(row))
        errAbort("Line %d of %s has too many fields", lf->lineIx, lf->fileName);
    if (rowSize < 2)
	continue;

    /* Convert first element to tagName */
    char tagName[256];
    aeFieldToNormalField("idf.", trimSpaces(row[0]), tagName, sizeof(tagName));

    /* Special case where we already are a comma separated list */
    if (sameString(tagName, "idf.Publication_Author_List"))
        {
	tagStanzaAppend(storm, stanza, tagName, row[1]);
	}
    else if (startsWith(additionalFilePrefix, tagName))
        {
	csvEscapeAndAppend(additionalFileDy, row[1]);
	}
    else if (sameString(secondaryAccessionTag, tagName))
        {
	csvEscapeAndAppend(secondaryAccessionDy, row[1]);
	}
    else
	{
	/* Convert rest of elements to possibly comma separated values */
	dyStringClear(dyVal);
	int i;
	for (i=1; i<rowSize; ++i)
	    csvEscapeAndAppend(dyVal, row[i]);
	tagStanzaAppend(storm, stanza, tagName, dyVal->string);
	}
    }
if (additionalFileDy->stringSize != 0)
     tagStanzaAppend(storm, stanza, additionalFilePrefix, additionalFileDy->string);
if (secondaryAccessionDy->stringSize != 0)
     tagStanzaAppend(storm, stanza, secondaryAccessionTag, secondaryAccessionDy->string);
dyStringFree(&secondaryAccessionDy);
dyStringFree(&additionalFileDy);
dyStringFree(&dyVal);
lineFileClose(&lf);
return storm;
}
Exemplo n.º 22
0
void badFormat(struct lineFile *lf)
/* Complain that format looks off. */
{
errAbort("Bad format line %d of %s", lf->lineIx, lf->fileName);
}
struct nameOff *scanIntronFile(char *preIntronQ, char *startIntronQ, 
    char *endIntronQ, char *postIntronQ, boolean invert)
{
char intronFileName[600];
FILE *f;
char lineBuf[4*1024];
char *words[4*128];
int wordCount;
int lineCount = 0;
int preLenQ = strlen(preIntronQ);
int startLenQ = strlen(startIntronQ);
int endLenQ = strlen(endIntronQ);
int postLenQ = strlen(postIntronQ);
char *preIntronF, *startIntronF, *endIntronF, *postIntronF;
int preLenF, startLenF, endLenF, postLenF;
int preIx = 6, startIx = 7, endIx =8, postIx = 9;
struct nameOff *list = NULL, *el;
boolean addIt;
int i;

if (preLenQ > 25 || postLenQ > 25 || startLenQ > 40 || endLenQ > 40)
    {
    errAbort("Can only handle queries up to 25 bases on either side of the intron "
             "and 40 bases inside the intron.");
    }
sprintf(intronFileName, "%s%s", wormCdnaDir(), "introns.txt");
f = mustOpen(intronFileName, "r");
while (fgets(lineBuf, sizeof(lineBuf), f) != NULL)
    {
    ++lineCount;
    wordCount = chopByWhite(lineBuf, words, ArraySize(words));
    if (wordCount == ArraySize(words))
        {
        warn("May have truncated end of line %d of %s",
            lineCount, intronFileName);
        }
    if (wordCount == 0)
        continue;
    if (wordCount < 11)
        errAbort("Unexpected short line %d of %s", lineCount, intronFileName);
    preIntronF = words[preIx];
    startIntronF = words[startIx];
    endIntronF = words[endIx];
    postIntronF = words[postIx];
    preLenF = strlen(preIntronF);
    startLenF = strlen(startIntronF);
    endLenF = strlen(endIntronF);
    postLenF = strlen(postIntronF);
    addIt = FALSE;
    if (   (  preLenQ == 0 || patMatch(preIntronQ, preIntronF+preLenF-preLenQ+countSpecial(preIntronQ), preLenQ))
        && (startLenQ == 0 || patMatch(startIntronQ, startIntronF, startLenQ))
        && (  endLenQ == 0 || patMatch(endIntronQ, endIntronF+endLenF-endLenQ+countSpecial(endIntronQ), endLenQ))
        && ( postLenQ == 0 || patMatch(postIntronQ, postIntronF, postLenQ)) )
        {
        addIt = TRUE;
        }
    if (invert)
        addIt = !addIt;
    if (addIt)
        {
        addIntronToHistogram(preIntronF+preLenF, startIntronF, endIntronF+endLenF, postIntronF);
        AllocVar(el);
        el->chrom = cloneString(words[1]);
        el->name = cloneString(words[5]);
        el->start = atoi(words[2]);
        el->end = atoi(words[3]);        
        el->cdnaCount = atoi(words[0]);
        memcpy(el->startI, startIntronF, 2);
        memcpy(el->endI, endIntronF + endLenF - 2, 2);
        assert(wordCount == el->cdnaCount + 10);
        for (i=10; i<wordCount; ++i)
            {
            struct slName *name = newSlName(words[i]);
            slAddHead(&el->cdnaNames, name);
            }
        slReverse(&el->cdnaNames);
        assert(slCount(el->cdnaNames) == el->cdnaCount);
        slAddHead(&list, el);
        }
    }
fclose(f);
slSort(&list, cmpCounts);
return list;
}
Exemplo n.º 24
0
struct hash *makeProbeBed(char *inGff, char *outBed)
/* Convert probe location GFF file to BED. */
{
struct lineFile *lf = lineFileOpen(inGff, TRUE);
char *row[9];
struct hash *hash = newHash(16);
FILE *f = mustOpen(outBed, "w");
while (lineFileNextRowTab(lf, row, ArraySize(row)))
    {
    int chromIx = romanToArabicChrom(row[0], lf);
    int start = lineFileNeedNum(lf, row, 3) - 1;
    int end = lineFileNeedNum(lf, row, 4);
    char *s = row[8];
    char *probe, *orf, *note; 
    char *boundAt = "Bound at ";
    struct tfBinding *tfbList = NULL, *tfb;
    if (!startsWith("Probe ", s))
        errAbort("Expecting 9th column to start with 'Probe ' line %d of %s",
		lf->lineIx, lf->fileName);
    probe = nextWord(&s);
    orf = nextWord(&s);
    chopOff(orf, ';');
    note = nextWord(&s);
    if (!sameWord("Note", note))
        errAbort("Expecting 'note' in 9th column line %d of %s", 
		lf->lineIx, lf->fileName);
    s = skipLeadingSpaces(s);
    if (!parseQuotedString(s, s, NULL))
        errAbort("Expecting quoted string in 9th column line %d of %s",
		lf->lineIx, lf->fileName);
    if (startsWith("Bad Probe", s))
        continue;
    else if (startsWith("Not bound", s))
        {
	/* Ok, we do nothing. */
	}
    else if (startsWith(boundAt, s))
	{
	while (s != NULL && startsWith(boundAt, s))
	    {
	    char *word, *by;
	    double binding;
	    s += strlen(boundAt);
	    word = nextWord(&s);
	    binding = atof(word);
	    by = nextWord(&s);
	    if (!sameString("by:", by))
	        errAbort("Expecting by: line %d of %s", lf->lineIx, lf->fileName);
	    while ((word = nextWord(&s)) != NULL)
		{
		char lastChar = 0, *e;
		e = word + strlen(word) - 1;
		lastChar = *e;
		if (lastChar == ';' || lastChar == ',')
		     *e = 0;
		AllocVar(tfb);
		tfb->binding = binding;
		tfb->tf = cloneString(word);
		slAddHead(&tfbList, tfb);
		if (lastChar == ';')
		     break;
		}
	    s = skipLeadingSpaces(s);
	    }
	slReverse(&tfbList);
	}
    else
        {
	errAbort("Expecting %s in note line %d of %s", boundAt, 
		lf->lineIx, lf->fileName);
	}
    fprintf(f, "chr%d\t%d\t%d\t", chromIx+1, start, end);
    fprintf(f, "%s\t%d\t", orf, slCount(tfbList));
    for (tfb = tfbList; tfb != NULL; tfb = tfb->next)
	fprintf(f, "%s,", tfb->tf);
    fprintf(f, "\t");
    for (tfb = tfbList; tfb != NULL; tfb = tfb->next)
        fprintf(f, "%4.3f,", tfb->binding);
    fprintf(f, "\n");
    hashAdd(hash, orf, NULL);
    }
lineFileClose(&lf);
carefulClose(&f);
return hash;
}
Exemplo n.º 25
0
void dbTrash(char *db)
/* dbTrash - drop tables from a database older than specified N hours. */
{
char query[256];
struct sqlResult *sr;
char **row;
int updateTimeIx;
int createTimeIx;
int dataLengthIx;
int indexLengthIx;
int nameIx;
int timeIxUsed;
unsigned long long totalSize = 0;
// expiredTableNames: table exists and is in metaInfo and subject to age limits
struct slName *expiredTableNames = NULL;
struct slName *lostTables = NULL;	// tables existing but not in metaInfo
unsigned long long lostTableCount = 0;
struct hash *expiredHash = newHash(10); // as determined by metaInfo
struct hash *notExpiredHash = newHash(10);
struct sqlConnection *conn = sqlConnect(db);

if (extFileCheck)
    checkExtFile(conn);

time_t ageSeconds = (time_t)(ageHours * 3600);	/*	age in seconds	*/
sqlSafef(query,sizeof(query),"select name,UNIX_TIMESTAMP(lastUse) from %s WHERE "
    "lastUse < DATE_SUB(NOW(), INTERVAL %ld SECOND);", CT_META_INFO,ageSeconds);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    hashAddInt(expiredHash, row[0], sqlSigned(row[1]));
sqlFreeResult(&sr);
sqlSafef(query,sizeof(query),"select name,UNIX_TIMESTAMP(lastUse) from %s WHERE "
    "lastUse >= DATE_SUB(NOW(), INTERVAL %ld SECOND);",CT_META_INFO,ageSeconds);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    hashAddInt(notExpiredHash, row[0], sqlSigned(row[1]));
sqlFreeResult(&sr);

if (tableStatus)  // show table status is very expensive, use only when asked
    {
    /*	run through the table status business to get table size information */
    sqlSafef(query,sizeof(query),"show table status");
    STATUS_INIT;
    while ((row = sqlNextRow(sr)) != NULL)
	{
	/* if not doing history too, and this is the history table, next row */
	if ((!historyToo) && (sameWord(row[nameIx],"history")))
	    continue;
	/* also skip the metaInfo table */
	if ((!historyToo) && (sameWord(row[nameIx],CT_META_INFO)))
	    continue;
	/* don't delete the extFile table  */
	if (sameWord(row[nameIx],CT_EXTFILE))
	    continue;

	SCAN_STATUS;

	if (hashLookup(expiredHash,row[nameIx]))
	    {
	    slNameAddHead(&expiredTableNames, row[nameIx]);
	    verbose(3,"%s %ld drop %s\n",row[timeIxUsed], (unsigned long)timep,
		    row[nameIx]);
	    /*	 If sizes are non-NULL, add them up	*/
	    if ( ((char *)NULL != row[dataLengthIx]) &&
		    ((char *)NULL != row[indexLengthIx]) )
		totalSize += sqlLongLong(row[dataLengthIx])
		    + sqlLongLong(row[indexLengthIx]);
	    hashRemove(expiredHash, row[nameIx]);
	    }
	else
	    {
	    if (hashLookup(notExpiredHash,row[nameIx]))
		verbose(3,"%s %ld OK %s\n",row[timeIxUsed], (unsigned long)timep,
		    row[nameIx]);
	    else
		{	/* table exists, but not in metaInfo, is it old enough ? */
		if (timep < dropTime)
		    {
		    slNameAddHead(&expiredTableNames, row[nameIx]);
		    verbose(2,"%s %ld dropt %s lost table\n",
			row[timeIxUsed], (unsigned long)timep, row[nameIx]);
		    /*       If sizes are non-NULL, add them up     */
		    if ( ((char *)NULL != row[dataLengthIx]) &&
			((char *)NULL != row[indexLengthIx]) )
			    totalSize += sqlLongLong(row[dataLengthIx])
				+ sqlLongLong(row[indexLengthIx]);
		    }
		else
		    verbose(3,"%s %ld OKt %s\n",row[timeIxUsed],
			(unsigned long)timep, row[nameIx]);
		}
	    }
	}
    sqlFreeResult(&sr);
    }
else
    {	// simple 'show tables' is more efficient than 'show table status'
    sqlSafef(query,sizeof(query),"show tables");
    sr = sqlGetResult(conn, query);
    while ((row = sqlNextRow(sr)) != NULL)
        {
	if (hashLookup(expiredHash,row[0]))
	    {
	    slNameAddHead(&expiredTableNames, row[0]);
	    time_t lastUse = (time_t)hashIntVal(expiredHash,row[0]);
	    struct tm *lastUseTm = localtime(&lastUse);
	    verbose(3,"%4d-%02d-%02d %02d:%02d:%02d %ld drop %s\n",
		lastUseTm->tm_year+1900, lastUseTm->tm_mon+1,
		lastUseTm->tm_mday, lastUseTm->tm_hour, lastUseTm->tm_min,
		lastUseTm->tm_sec, (unsigned long)lastUse,row[0]);
	    hashRemove(expiredHash, row[0]);
	    }
	else if (hashLookup(notExpiredHash,row[0]))
	    {
	    time_t lastUse = (time_t)hashIntVal(notExpiredHash,row[0]);
	    struct tm *lastUseTm = localtime(&lastUse);
	    verbose(3,"%4d-%02d-%02d %02d:%02d:%02d %ld OK %s\n",
		lastUseTm->tm_year+1900, lastUseTm->tm_mon+1,
		lastUseTm->tm_mday, lastUseTm->tm_hour, lastUseTm->tm_min,
		lastUseTm->tm_sec, (unsigned long)lastUse,row[0]);
	    }
	else
	    {
	    struct slName *el = slNameNew(row[0]);
	    slAddHead(&lostTables, el);
	    }
        }
    sqlFreeResult(&sr);
    lostTableCount = slCount(lostTables);
    // If tables exist, but not in metaInfo, check their age to expire them.
    // It turns out even this show table status is slow too, so, only
    // run thru it if asked to eliminate lost tables.  It is better to
    // do this operation with the stand-alone perl script on the customTrash
    // database machine.
    if (delLostTable && lostTables)
	{
	struct slName *el;
	for (el = lostTables; el != NULL; el = el->next)
	    {
	    if (sameWord(el->name,"history"))
		continue;
	    if (sameWord(el->name,CT_META_INFO))
		continue;
	    if (sameWord(el->name,CT_EXTFILE))
		continue;
	    boolean oneTableOnly = FALSE; // protect against multiple tables
	    /*	get table time information to see if it is expired */
	    sqlSafef(query,sizeof(query),"show table status like '%s'", el->name);
	    STATUS_INIT;

	    while ((row = sqlNextRow(sr)) != NULL)
		{
		if (oneTableOnly)
		    errAbort("ERROR: query: '%s' returned more than one table "
				"name\n", query);
		else
		    oneTableOnly = TRUE;
		if (differentWord(row[nameIx], el->name))
		    errAbort("ERROR: query: '%s' did not return table name '%s' != '%s'\n", query, el->name, row[nameIx]);

		SCAN_STATUS;

		if (timep < dropTime)
		    {
		    slNameAddHead(&expiredTableNames, row[nameIx]);
		    verbose(2,"%s %ld dropt %s lost table\n",
			row[timeIxUsed], (unsigned long)timep, row[nameIx]);
		    }
		else
		    verbose(3,"%s %ld OKt %s\n",
			row[timeIxUsed], (unsigned long)timep, row[nameIx]);
		}
	    sqlFreeResult(&sr);
	    }
	}
    }

/*	perhaps the table was already dropped, but not from the metaInfo */
struct hashEl *elList = hashElListHash(expiredHash);
struct hashEl *el;
for (el = elList; el != NULL; el = el->next)
    {
    verbose(2,"%s exists in %s only\n", el->name, CT_META_INFO);
    if (drop)
	ctTouchLastUse(conn, el->name, FALSE); /* removes metaInfo row */
    }

if (drop)
    {
    char comment[256];
    if (expiredTableNames)
	{
	struct slName *el;
	int droppedCount = 0;
	/* customTrash DB user permissions do not have permissions to
 	 * drop tables.  Must use standard special user that has all
 	 * permissions.  If we are not using the standard user at this
 	 * point, then switch to it.
	 */
	if (sameWord(db,CUSTOM_TRASH))
	    {
	    sqlDisconnect(&conn);
	    conn = sqlConnect(db);
	    }
	for (el = expiredTableNames; el != NULL; el = el->next)
	    {
	    verbose(2,"# drop %s\n", el->name);
	    sqlDropTable(conn, el->name);
	    ctTouchLastUse(conn, el->name, FALSE); /* removes metaInfo row */
	    ++droppedCount;
	    }
	/* add a comment to the history table and finish up connection */
	if (tableStatus)
	    safef(comment, sizeof(comment), "Dropped %d tables with "
		"total size %llu, %llu lost tables",
		    droppedCount, totalSize, lostTableCount);
	else
	    safef(comment, sizeof(comment),
		"Dropped %d tables, no size info, %llu lost tables",
		    droppedCount, lostTableCount);
	verbose(2,"# %s\n", comment);
	hgHistoryComment(conn, "%s", comment);
	}
    else
	{
	safef(comment, sizeof(comment),
	    "Dropped no tables, none expired, %llu lost tables",
		lostTableCount);
	verbose(2,"# %s\n", comment);
	}
    }
else
    {
    char comment[256];
    if (expiredTableNames)
	{
	int droppedCount = slCount(expiredTableNames);
	if (tableStatus)
	    safef(comment, sizeof(comment), "Would have dropped %d tables with "
		"total size %llu, %llu lost tables",
		    droppedCount, totalSize, lostTableCount);
	else
	    safef(comment, sizeof(comment),
		"Would have dropped %d tables, no size info, %llu lost tables",
		    droppedCount, lostTableCount);
	verbose(2,"# %s\n", comment);
	}
    else
	{
	safef(comment, sizeof(comment),
	    "Would have dropped no tables, none expired, %llu lost tables",
		lostTableCount);
	verbose(2,"# %s\n", comment);
	}
    }
sqlDisconnect(&conn);
}
Exemplo n.º 26
0
struct hash *makeMotifBed(char *gffDir, char *outBed)
/* Make bed file from GFFs.  Return hash of transcription factors. */
{
static char *consLevelPath[3] = {"3", "2", "0"};
static char *consLevelBed[3] = {"2", "1", "0"};
static char *pLevelPath[3] = {"p001b", "p005b", "nobind"};
static char *pLevelBed[3] = {"good", "weak", "none"};
int cIx, pIx;
FILE *f = mustOpen(outBed, "w");
struct hash *tfHash = newHash(0);
struct hash *yrcHash = newHash(18);
struct yrc *yrcList = NULL, *yrc;

for (cIx=0; cIx<3; ++cIx)
   {
   for (pIx=0; pIx<3; ++pIx)
       {
       struct lineFile *lf;
       char *row[10];
       char fileName[PATH_LEN];
       char hashKey[256];

       safef(fileName, sizeof(fileName), "%s/IGR_v24.%s.%s.GFF",
       	   gffDir, consLevelPath[cIx], pLevelPath[pIx]);
       lf = lineFileOpen(fileName, TRUE);
       while (lineFileRow(lf, row))
            {
	    char *name = row[9];
	    char *e;
	    int chromIx, chromStart, chromEnd;
	    if (!sameWord(row[8], "Site"))
	        errAbort("Expecting 'Site' line %d of %s", lf->lineIx, lf->fileName);
	    e = strchr(name, ';');
	    if (e == NULL)
	        errAbort("Expecting semicolon line %d of %s", lf->lineIx, lf->fileName);
	    *e = 0;
	    chromIx = romanToArabicChrom(row[0], lf);
	    chromStart = lineFileNeedNum(lf, row, 3);
	    chromEnd = lineFileNeedNum(lf, row, 4);
	    safef(hashKey, sizeof(hashKey), "%s.%d.%d", name, chromIx, chromStart);
	    if ((yrc = hashFindVal(yrcHash, hashKey)) == NULL)
	        {
		AllocVar(yrc);
		yrc->chromIx= chromIx;
		yrc->chromStart = chromStart;
		yrc->chromEnd = chromEnd;
		yrc->name = hashStoreName(tfHash, name);
		yrc->pLevel = pIx;
		yrc->consLevel = cIx;
		hashAdd(yrcHash, hashKey, yrc);
		slAddHead(&yrcList, yrc);
		}
	    else
	        {
		if (pIx < yrc->pLevel)
		    yrc->pLevel = pIx;
		if (cIx < yrc->consLevel)
		    yrc->consLevel = cIx;
		}
	    }
       lineFileClose(&lf);
       }
   }
for (yrc = yrcList; yrc != NULL; yrc = yrc->next)
    {
    fprintf(f, "chr%d\t", yrc->chromIx+1);
    fprintf(f, "%d\t", yrc->chromStart);
    fprintf(f, "%d\t", yrc->chromEnd);
    fprintf(f, "%s\t", yrc->name);
    fprintf(f, "%d\t", (int)(1000/(yrc->pLevel + yrc->consLevel + 1)));
    fprintf(f, "%s\t", pLevelBed[yrc->pLevel]);
    fprintf(f, "%s\n", consLevelBed[yrc->consLevel]);
    }
carefulClose(&f);
hashFree(&yrcHash);
return tfHash;
}
Exemplo n.º 27
0
void testOutSequence(struct htmlPage *tablePage, struct htmlForm *mainForm,
     char *org, char *db, char *group, char *track, char *table, 
     int expectedRows)
/* Get as sequence and make sure count agrees with expected. */
/* mainForm not used */
{
struct htmlPage *outPage;
int attempts = 0;
struct htmlFormVar *typeVar;

if (tablePage->forms == NULL) 
    errAbort("testOutSequence: Missing form (tablePage)");

htmlPageSetVar(tablePage, NULL, hgtaOutputType, "sequence");
outPage = quickSubmit(tablePage, org, db, group, track, table,
    "seqUi1", hgtaDoTopSubmit, "submit");
while (outPage == NULL && attempts < MAX_ATTEMPTS) 
    {
    printf("testOutSequence: trying again to get seqUi1\n");
    outPage = quickSubmit(tablePage, org, db, group, track, table,
        "seqUi1", hgtaDoTopSubmit, "submit");
    attempts++;
    }
if (outPage == NULL) 
    {
    qaStatusSoftError(tablesTestList->status,
        "Error in testOutSequence - couldn't get outPage");
    return;
    }
if (outPage->forms == NULL)
    {
    qaStatusSoftError(tablesTestList->status,
        "Error in testOutSequence - missing form");
    htmlPageFree(&outPage);
    return;
    }

/* Since some genomic sequence things are huge, this will
 * only test in case where it's a gene prediction. */
typeVar = htmlFormVarGet(outPage->forms, hgtaGeneSeqType);
if (typeVar != NULL)
    {
    struct htmlPage *seqPage;
    static char *types[] = {"protein", "mRNA"};
    int i;
    for (i=0; i<ArraySize(types); ++i)
        {
        char *type = types[i];
        if (slNameInList(typeVar->values, type))
             {
	     struct htmlPage *page;
	     char testName[128];
	     htmlPageSetVar(outPage, NULL, hgtaGeneSeqType, type);
	     safef(testName, sizeof(testName), "%sSeq", type);
	     page = quickSubmit(outPage, org, db, group, track, table,
	        testName, hgtaDoGenePredSequence, "submit");
	     checkFaOutput(page, expectedRows, TRUE);
	     htmlPageFree(&page);
	     }
         }
    htmlPageSetVar(outPage, NULL, hgtaGeneSeqType, "genomic");
    serialSubmit(&outPage, org, db, group, track, table, "seqUi2", hgtaDoGenePredSequence, "submit");
    // check that outPage != NULL

    /* On genomic page uncheck intron if it's there, then get results * and count them. */
    if (htmlFormVarGet(outPage->forms, "hgSeq.intron") != NULL)
         htmlPageSetVar(outPage, NULL, "hgSeq.intron", NULL);
    seqPage = quickSubmit(outPage, org, db, group, track, table, "genomicSeq", hgtaDoGenomicDna, "submit");
    // check that seqPage != NULL
    checkFaOutput(seqPage, expectedRows, FALSE);
    htmlPageFree(&seqPage);
    }

htmlPageFree(&outPage);
}
static void doPslMapAli(struct sqlConnection *conn, 
    int taxon, char *db, 
    int fromTaxon, char *fromDb)
{
char cmd[256];

struct dyString *dy = dyStringNew(0);
char path[256];
char dnaPath[256];
char toDb[12];

safef(toDb,sizeof(toDb),"%s", db);
toDb[0]=toupper(toDb[0]);

safef(dnaPath,sizeof(dnaPath),"/cluster/data/%s/nib", db);
if (!fileExists(dnaPath))
    {
    safef(dnaPath,sizeof(dnaPath),"/cluster/data/%s/%s.2bit", db, db);
    if (!fileExists(dnaPath))
	errAbort("unable to locate nib dir or .2bit for %s: %s", db, dnaPath);
    }
    
safef(path,sizeof(path),"/gbdb/%s/liftOver/%sTo%s.over.chain.gz", fromDb, fromDb, toDb);
if (!fileExists(path))
    errAbort("unable to locate chain file %s",path);

/* get non-bac $db.vgProbes not yet aligned */
getPslMapAli(conn, db, fromTaxon, fromDb, FALSE);
/* get bac $db.vgProbes not yet aligned */
getPslMapAli(conn, db, fromTaxon, fromDb, TRUE);
/* get .fa for pslRecalcMatch use */
getPslMapFa(conn, db, fromTaxon);

/* non-bac */
safef(cmd,sizeof(cmd),
"zcat %s | pslMap -chainMapFile -swapMap  nonBac.psl stdin stdout "
"|  sort -k 14,14 -k 16,16n > unscoredNB.psl"
,path);
verbose(1,"%s\n",cmd); system(cmd);

safef(cmd,sizeof(cmd),
"pslRecalcMatch unscoredNB.psl %s" 
" pslMap.fa nonBac.psl"
,dnaPath);
verbose(1,"%s\n",cmd); system(cmd);

/* bac */
safef(cmd,sizeof(cmd),
"zcat %s | pslMap -chainMapFile -swapMap  bac.psl stdin stdout "
"|  sort -k 14,14 -k 16,16n > unscoredB.psl"
,path);
verbose(1,"%s\n",cmd); system(cmd);

safef(cmd,sizeof(cmd),
"pslRecalcMatch unscoredB.psl %s" 
" pslMap.fa bacTemp.psl"
,dnaPath);
verbose(1,"%s\n",cmd); system(cmd);

safef(cmd,sizeof(cmd),
"pslCDnaFilter -globalNearBest=0.00001 -minCover=0.05"
" bacTemp.psl bac.psl");
verbose(1,"%s\n",cmd); system(cmd);

safef(cmd,sizeof(cmd),"cat bac.psl nonBac.psl > vgPrbPslMap.psl");
verbose(1,"%s\n",cmd); system(cmd);

dyStringFree(&dy);

}
int splatCheck1(char *inFa, char *inSplat, char *outMiss, char *outWrong)
/* splatCheck1 - Check that all the test set really is being covered.. */
{
struct lineFile *lf = lineFileOpen(inSplat, TRUE);
FILE *missF = mustOpen(outMiss, "w");
FILE *badF = mustOpen(outWrong, "w");
char *row[7];
struct hash *allHash = hashFaNames(inFa);
struct hash *mappedHash = hashNew(0);	/* Keep track of reads we've seen here. */
struct hash *goodHash = hashNew(0);	/* Keep track of good reads here. */
while (lineFileRow(lf, row))
    {
    /* Read in line and parse it, track it. */
    int chromStart = sqlUnsigned(row[1]);
    char *strand = row[5];
    char *name = row[6];
    hashStore(mappedHash, name);

    /* Parse out name field to figure out where we expect it to map. */
    char *pt = name;
    char *expectStrand = "+";
    if (startsWith("RC_", pt))
        {
	pt += 3;
	expectStrand = "-";
	}
    char *nameCopy = cloneString(pt);
    char *parts[4];
    int partCount = chopByChar(nameCopy, '_', parts, ArraySize(parts));
    if (partCount != 3)
        errAbort("Can't parse name field line %d of %s", lf->lineIx, lf->fileName);
    int expectStart = sqlUnsigned(parts[1]);
    if (sameString(strand, expectStrand))
        {
	int diff = intAbs(chromStart - expectStart);
	if (diff <= 2)
	    {
	    hashStore(goodHash, name);
	    }
	}
    freeMem(nameCopy);
    }

struct hashEl *hel, *helList = hashElListHash(allHash);
int allCount = allHash->elCount;
int missCount = 0, badCount = 0;
for (hel = helList; hel != NULL; hel = hel->next)
    {
    char *name = hel->name;
    if (!hashLookup(mappedHash, name))
	{
        fprintf(missF, "%s\n", name);
	++missCount;
	}
    else
        {
	if (!hashLookup(goodHash, hel->name))
	    {
	    fprintf(badF, "%s\n", hel->name);
	    ++badCount;
	    }
	}
     
    }
carefulClose(&badF);
carefulClose(&missF);
verbose(1, "Total reads %d\n", allCount);
verbose(1, "Unmapped %d (%5.2f%%)\n", missCount, 100.0*missCount/allCount);
verbose(1, "Mapped wrong %d (%5.2f%%)\n", badCount, 100.0*badCount/allCount);
return -(missCount + badCount);
}
Exemplo n.º 30
0
void pslSort2(char *outFile, char *tempDir)
/* Do second step of sort - merge all sorted files in tempDir
 * to final. */
{
char fileName[512];
struct slName *tmpList, *tmp;
struct midFile *midList = NULL, *mid;
int aliCount = 0;
FILE *f = mustOpen(outFile, "w");


if (!nohead)
    pslWriteHead(f);
tmpList = listDir(tempDir, "tmp*.psl");
if (tmpList == NULL)
    errAbort("No tmp*.psl files in %s\n", tempDir);
for (tmp = tmpList; tmp != NULL; tmp = tmp->next)
    {
    sprintf(fileName, "%s/%s", tempDir, tmp->name);
    AllocVar(mid);
    mid->lf = pslFileOpen(fileName);
    slAddHead(&midList, mid);
    }
verbose(1, "writing %s", outFile);
fflush(stdout);
/* Write out the lowest sorting line from mid list until done. */
for (;;)
    {
    struct midFile *bestMid = NULL;
    if ( (++aliCount & 0xffff) == 0)
	{
	verboseDot();
	fflush(stdout);
	}
    for (mid = midList; mid != NULL; mid = mid->next)
	{
	if (mid->lf != NULL && mid->psl == NULL)
	    {
	    if ((mid->psl = nextPsl(mid->lf)) == NULL)
		lineFileClose(&mid->lf);
	    }
	if (mid->psl != NULL)
	    {
	    if (bestMid == NULL || pslCmpQuery(&mid->psl, &bestMid->psl) < 0)
		bestMid = mid;
	    }
	}
    if (bestMid == NULL)
	break;
    pslTabOut(bestMid->psl, f);
    pslFree(&bestMid->psl);
    }
printf("\n");
fclose(f);

/* The followint really shouldn't be necessary.... */
for (mid = midList; mid != NULL; mid = mid->next)
    lineFileClose(&mid->lf);

printf("Cleaning up temp files\n");
for (tmp = tmpList; tmp != NULL; tmp = tmp->next)
    {
    sprintf(fileName, "%s/%s", tempDir, tmp->name);
    remove(fileName);
    }
}