void hgGoldGapGl(char *database, char *gsDir, char *ooSubDir, boolean doGl, char *oneChrom)
/* hgGoldGapGl - Put chromosome .agp and .gl files into browser database.. */
{
    struct fileInfo *chrFiList, *chrFi;
    struct sqlConnection *conn = NULL;
    char ooDir[512];
    char pathName[512];
    struct hash *cloneVerHash = newHash(0);
    boolean gotAny = FALSE;
    struct hash *chromDirHash = newHash(4);
    char *chromLst = optionVal("chromLst", NULL);

    if (! noLoad)
        conn = sqlConnect(database);

    verbose(2,"#\tcomplete gold, gap and .gl files produced\n");

    if (chromLst != NULL)
    {
        struct lineFile *clf = lineFileOpen(chromLst, TRUE);
        char *row[1];
        while (lineFileRow(clf, row))
        {
            hashAdd(chromDirHash, row[0], NULL);
        }
        lineFileClose(&clf);
    }

    sprintf(ooDir, "%s/%s", gsDir, ooSubDir);
    /* target prefix is used in zoo browser */
    if (oneChrom != NULL && (startsWith("chr", oneChrom) || startsWith("target", oneChrom)))
        oneChrom += 3;


    if (doGl)
    {
        sprintf(pathName, "%s/ffa/sequence.inf", gsDir);
        makeCloneVerHash(pathName, cloneVerHash);
    }

    chrFiList = listDirX(ooDir, "*", FALSE);
    for (chrFi = chrFiList; chrFi != NULL; chrFi = chrFi->next)
    {
        if (chrFi->isDir &&
                ((strlen(chrFi->name) <= 2) || startsWith("NA_", chrFi->name) ||
                 (NULL != hashLookup(chromDirHash, chrFi->name))))
        {
            if (oneChrom == NULL || sameWord(chrFi->name, oneChrom))
            {
                sprintf(pathName, "%s/%s", ooDir, chrFi->name);
                makeGoldAndGap(conn, pathName);
                if (doGl)
                    makeGl(conn, pathName, cloneVerHash);
                gotAny = TRUE;
                uglyf("done %s\n", chrFi->name);
            }
        }
    }
    slFreeList(&chrFiList);
    if (! noLoad)
        sqlDisconnect(&conn);
    hashFree(&chromDirHash);
    if (!gotAny)
        errAbort("No contig agp and gold files found");
}
Exemple #2
0
void knownToVisiGene(char *database)
/* knownToVisiGene - Create knownToVisiGene table by riffling through various other knownTo tables. */
{
char *tempDir = ".";
FILE *f = hgCreateTabFile(tempDir, outTable);
struct sqlConnection *hConn = sqlConnect(database);
struct sqlConnection *iConn = sqlConnect(visiDb);
struct sqlResult *sr;
char **row;
struct hash *geneImageHash = newHash(18);
struct hash *locusLinkImageHash = newHash(18);
struct hash *refSeqImageHash = newHash(18);
struct hash *genbankImageHash = newHash(18);
struct hash *probeImageHash = newHash(18);
struct hash *knownToLocusLinkHash = newHash(18);
struct hash *knownToRefSeqHash = newHash(18);
struct hash *knownToGeneHash = newHash(18);
struct hash *favorHugoHash = newHash(18);
struct hash *knownToProbeHash = newHash(18);
struct hash *knownToAllProbeHash = newHash(18);
struct genePred *knownList = NULL, *known;
struct hash *dupeHash = newHash(17);


probesDb  = optionVal("probesDb", database);
struct sqlConnection *probesConn = sqlConnect(probesDb);
vgProbes = sqlTableExists(probesConn,"vgProbes");
vgAllProbes = sqlTableExists(probesConn,"vgAllProbes");

/* Go through and make up hashes of images keyed by various fields. */
sr = sqlGetResult(iConn,
        "NOSQLINJ select image.id,imageFile.priority,gene.name,gene.locusLink,gene.refSeq,gene.genbank"
	",probe.id,submissionSet.privateUser,vgPrbMap.vgPrb,gene.id"
	" from image,imageFile,imageProbe,probe,gene,submissionSet,vgPrbMap"
	" where image.imageFile = imageFile.id"
	" and image.id = imageProbe.image"
	" and imageProbe.probe = probe.id"
	" and probe.gene = gene.id"
	" and image.submissionSet=submissionSet.id"
	" and vgPrbMap.probe = probe.id");

while ((row = sqlNextRow(sr)) != NULL)
    {
    int id = sqlUnsigned(row[0]);
    float priority = atof(row[1]);
    int privateUser = sqlSigned(row[7]);
    char vgPrb_Id[256];
    safef(vgPrb_Id, sizeof(vgPrb_Id), "vgPrb_%s",row[8]);
    int geneId = sqlUnsigned(row[9]);
    if (privateUser == 0)
	{
	addPrioritizedImage(probeImageHash, id, priority, geneId, vgPrb_Id);
	addPrioritizedImage(geneImageHash, id, priority, geneId, row[2]);
	addPrioritizedImage(locusLinkImageHash, id, priority, geneId, row[3]);
	addPrioritizedImage(refSeqImageHash, id, priority, geneId, row[4]);
	addPrioritizedImage(genbankImageHash, id, priority, geneId, row[5]);
	}
    }
verbose(2, "Made hashes of image: geneImageHash %d, locusLinkImageHash %d, refSeqImageHash %d"
           ", genbankImageHash %d probeImageHash %d\n", 
            geneImageHash->elCount, locusLinkImageHash->elCount, refSeqImageHash->elCount, 
	    genbankImageHash->elCount, probeImageHash->elCount);
sqlFreeResult(&sr);

/* Build up list of known genes. */
sr = sqlGetResult(hConn, "NOSQLINJ select * from knownGene");
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct genePred *known = genePredLoad(row);
    if (!hashLookup(dupeHash, known->name))
        {
	hashAdd(dupeHash, known->name, NULL);
	slAddHead(&knownList, known);
	}
    }
slReverse(&knownList);
sqlFreeResult(&sr);
verbose(2, "Got %d known genes\n", slCount(knownList));

/* Build up hashes from knownGene to other things. */
if (vgProbes)
    bestProbeOverlap(probesConn, "vgProbes", knownList, knownToProbeHash);
if (vgAllProbes)
    bestProbeOverlap(probesConn, "vgAllProbes", knownList, knownToAllProbeHash);

foldIntoHash(hConn, "knownToLocusLink", "name", "value", knownToLocusLinkHash, NULL, FALSE);
foldIntoHash(hConn, "knownToRefSeq", "name", "value", knownToRefSeqHash, NULL, FALSE);
foldIntoHash(hConn, "kgXref", "kgID", "geneSymbol", knownToGeneHash, favorHugoHash, FALSE);
foldIntoHash(hConn, "kgAlias", "kgID", "alias", knownToGeneHash, favorHugoHash, TRUE);
foldIntoHash(hConn, "kgProtAlias", "kgID", "alias", knownToGeneHash, favorHugoHash, TRUE);

verbose(2, "knownToLocusLink %d, knownToRefSeq %d, knownToGene %d knownToProbe %d knownToAllProbe %d\n", 
   knownToLocusLinkHash->elCount, knownToRefSeqHash->elCount, knownToGeneHash->elCount,
   knownToProbeHash->elCount, knownToAllProbeHash->elCount);

/* Try and find an image for each gene. */
for (known = knownList; known != NULL; known = known->next)
    {
    char *name = known->name;
    struct prioritizedImage *best = NULL;
    {
    best = bestImage(name, knownToLocusLinkHash, locusLinkImageHash);
    if (!best)
	best = bestImage(name, knownToRefSeqHash, refSeqImageHash);
    if (!best)
	{
	best = hashFindVal(genbankImageHash, name);
	}
    if (!best)
	best = bestImage(name, knownToGeneHash, geneImageHash);
    if (vgProbes && !best)
	best = bestImage(name, knownToProbeHash, probeImageHash);
    if (vgAllProbes && !best)
	best = bestImage(name, knownToAllProbeHash, probeImageHash);
    }	    
    if (best)
        {
	fprintf(f, "%s\t%d\t%d\n", name, best->imageId, best->geneId);
	}
    }

createTable(hConn, outTable);
hgLoadTabFile(hConn, tempDir, outTable, &f);
hgRemoveTabFile(tempDir, outTable);
}
Exemple #3
0
int main(int argc, char *argv[])
{
struct sqlConnection *conn, *conn3;
char query[256], query3[256];
struct sqlResult *sr, *sr3;
char **row, **row3;

FILE *o1, *o2;

char *locusID;	/* LocusLink ID */

char *kgTempDbName, *roDbName; 
char cond_str[200];
char *kgId;
char *mapID;
char *desc;
char *mRNA;

optionInit(&argc, argv, options);
if (argc != 3)  usage();
kgTempDbName    = argv[1];
roDbName 	= argv[2];

conn = hAllocConn(roDbName);
conn3= hAllocConn(roDbName);

o1 = fopen("j.dat",  "w");
o2 = fopen("jj.dat", "w");
    
table = optionVal("table", "knownGene");
sqlSafef(query, sizeof(query), "select name from %s.%s", roDbName, table);
sr = sqlMustGetResult(conn, query);
row = sqlNextRow(sr);
while (row != NULL)
    {
    kgId = row[0];
	
    sqlSafefFrag(cond_str, sizeof(cond_str), "kgId='%s'", kgId);
    mRNA = sqlGetField(roDbName, "kgXref", "mRNA", cond_str);
    
    sqlSafefFrag(cond_str, sizeof(cond_str), "mrna='%s'", mRNA);
    locusID = sqlGetField("entrez", "entrezMrna", "geneId", cond_str);
    
    /* look for RefSeq if not found in mRNAs */
    if (locusID == NULL)
    	{
    	sqlSafefFrag(cond_str, sizeof(cond_str), "refseq='%s'", mRNA);
    	locusID = sqlGetField("entrez", "entrezRefseq", "geneId", cond_str);
	}

    if (locusID != NULL)
	{
        sqlSafef(query3, sizeof(query3), "select * from %s.keggList where locusID = '%s'", kgTempDbName, locusID);
        sr3 = sqlGetResult(conn3, query3);
        while ((row3 = sqlNextRow(sr3)) != NULL)
            {
            mapID   = row3[1];
	    desc    = row3[2];
	    fprintf(o1, "%s\t%s\t%s\n", kgId, locusID, mapID);
	    fprintf(o2, "%s\t%s\n", mapID, desc);
	    row3 = sqlNextRow(sr3);
            }
        sqlFreeResult(&sr3);
	}
    else
        {
	/* printf("%s not found in Entrez.\n", kgId);fflush(stdout);*/
        if (differentString(table, "knownGene"))
            {
            sqlSafefFrag(cond_str, sizeof(cond_str), "name='%s'", kgId);
            locusID = sqlGetField(roDbName, table, "name2", cond_str);
            sqlSafef(query3, sizeof(query3), "select * from %s.keggList where locusID = '%s'", kgTempDbName, kgId);
            sr3 = sqlGetResult(conn3, query3);
            while ((row3 = sqlNextRow(sr3)) != NULL)
                {
                mapID   = row3[1];
                desc    = row3[2];
                fprintf(o1, "%s\t%s\t%s\n", kgId, locusID, mapID);
                fprintf(o2, "%s\t%s\n", mapID, desc);
                row3 = sqlNextRow(sr3);
                }
            sqlFreeResult(&sr3);
            }
        }
    row = sqlNextRow(sr);
    }

fclose(o1);
fclose(o2);
hFreeConn(&conn);

mustSystem("cat j.dat|sort|uniq >keggPathway.tab");
mustSystem("cat jj.dat|sort|uniq >keggMapDesc.tab");
mustSystem("rm j.dat");
mustSystem("rm jj.dat");
return(0);
}
Exemple #4
0
void altSummary(char *db, char *agxFileName, char *summaryOutName, char *htmlOutName, char *htmlFramesOutName)
/* Look through a bunch of splice sites and output some statistics and links. */
{
struct altGraphX *agList = NULL, *ag = NULL;
struct altSpliceSite *aSpliceList = NULL, *aSplice=NULL;
char *RDataName = optionVal("RData", NULL);
char *bedName = optionVal("bedName", NULL);
FILE *htmlOut = NULL;
FILE *htmlFramesOut = NULL;
FILE *summaryOut = NULL;
int altSpliceSites = 0, altSpliceLoci = 0, totalSpliceSites = 0;
warn("Loading splicing graphs.");
agList = altGraphXLoadAll(agxFileName);
htmlFramesOut = mustOpen(htmlFramesOutName, "w");
htmlOut = mustOpen(htmlOutName, "w");
summaryOut = mustOpen(summaryOutName, "w");
if(RDataName != NULL)
    {
    char buff[256];
    safef(buff, sizeof(buff), "%s.control", RDataName);
    RDataCont = mustOpen(buff, "w");
    outputRHeader(RDataCont);
    safef(buff, sizeof(buff), "%s.alt", RDataName);
    RData = mustOpen(buff, "w");
    outputRHeader(RData);
    }
if(bedName != NULL)
    {
    openBedFiles(bedName);
    }

writeOutFrames(htmlFramesOut, htmlOutName, db);
carefulClose(&htmlFramesOut);
warn("Examining splicing graphs.");
fprintf(htmlOut, "<html>\n<body bgcolor=\"#FFF9D2\"><b>Alt-Splice List</b>\n"
	"<table border=1><tr><th>Name (count)</th><th>Type</th><th>Size</th></tr>\n");
for(ag=agList; ag != NULL; ag=ag->next)
    {
    lookForAltSplicing(db, ag, &aSpliceList, &altSpliceSites, &altSpliceLoci, &totalSpliceSites);
    for(aSplice=aSpliceList; aSplice != NULL; aSplice= aSplice->next)
	{
 	altSpliceSiteOutput(aSplice, summaryOut, '\t', '\n');
	htmlLinkOut(db, aSplice, htmlOut);
	if(bedViewOutFile != NULL)
	    bedViewOut(aSplice, bedViewOutFile);
	}
    altSpliceSiteFreeList(&aSpliceList);
    }
warn("\nDone.");
fprintf(htmlOut,"</body></html>\n");
warn("%d altSpliced sites in %d alt-spliced loci out of %d total loci.",
     altSpliceSites, altSpliceLoci, slCount(agList));
printSpliceTypeInfo(altSpliceLoci);
altGraphXFreeList(&agList);
if(RData != NULL)
    {
    carefulClose(&RData);
    carefulClose(&RDataCont);
    }
carefulClose(&htmlOut);
carefulClose(&summaryOut);
}
void paraNode()
/* paraNode - a net server. */
{
char *line;
char *command;
struct sockaddr_in sai;

/* We have to know who we are... */
hostName = getMachine();
initRandom();
getTicksToHundreths();

/* log init */
if (optionExists("log"))
    logOpenFile("paraNode", optionVal("log", NULL));
else    
    logOpenSyslog("paraNode", optionVal("logFacility", NULL));
logSetMinPriority(optionVal("logMinPriority", "info"));
logInfo("starting paraNode on %s", hostName);

/* Make job lists. */
jobsRunning = newDlList();
jobsFinished = newDlList();

/* Set up socket and self to listen to it. */
ZeroVar(&sai);
sai.sin_family = AF_INET;
sai.sin_port = htons(paraNodePort);
sai.sin_addr.s_addr = INADDR_ANY;
mainRudp = rudpMustOpenBound(&sai);
mainRudp->maxRetries = 12;

/* Event loop. */
findNow();
for (;;)
    {
    /* Get next incoming message and optionally check to make
     * sure that it's from a host we trust, and check signature
     * on first bit of incoming data. */
    if (pmReceive(&pmIn, mainRudp))
	{
	findNow();
	if (hubName == NULL || ntohl(pmIn.ipAddress.sin_addr.s_addr) == hubIp 
		|| ntohl(pmIn.ipAddress.sin_addr.s_addr) == localIp)
	    {
	    /* Host and signature look ok,  read a string and
	     * parse out first word as command. */
	    line = pmIn.data;
	    logDebug("message from %s: \"%s\"",
                     paraFormatIp(ntohl(pmIn.ipAddress.sin_addr.s_addr)),
                     line);
	    command = nextWord(&line);
	    if (command != NULL)
		{
		if (sameString("quit", command))
		    break;
		else if (sameString("run", command))
		    doRun(line, &pmIn.ipAddress);
		else if (sameString("jobDone", command))
		    jobDone(line);
		else if (sameString("status", command))
		    doStatus();
		else if (sameString("kill", command))
		    doKill(line);
		else if (sameString("check", command))
		    doCheck(line, &pmIn.ipAddress);
		else if (sameString("resurrect", command))
		    doResurrect(line, &pmIn.ipAddress);
		else if (sameString("listJobs", command))
		    listJobs();
		else if (sameString("fetch", command))
		    doFetch(line);
                else
                    logWarn("invalid command: \"%s\"", command);
		}
	    logDebug("done command");
	    }
	else
	    {
	    logWarn("command from unauthorized host %s",
                    paraFormatIp(ntohl(pmIn.ipAddress.sin_addr.s_addr)));
	    }
	}
    }
rudpClose(&mainRudp);
}
void splitByCount(char *inName, int pieceSize, char *outRoot, off_t estSize, int extra)
/* Split up file into pieces pieceSize long. */
{
off_t pieces = (estSize + pieceSize-1)/pieceSize;
int digits = digitsBaseTen(pieces);
int maxN = optionInt("maxN", pieceSize-1);
boolean oneFile = optionExists("oneFile");
char fileName[PATH_LEN];
char dirOnly[PATH_LEN], noPath[128];
int pos, pieceIx = 0, writeCount = 0;
struct dnaSeq seq;
struct lineFile *lf = lineFileOpen(inName, TRUE);
FILE *f = NULL;
Bits *bits = NULL;
int seqCount = 0;
char *outFile = optionVal("out", NULL);
char *liftFile = optionVal("lift", NULL);
FILE *lift = NULL;
ZeroVar(&seq);

splitPath(outRoot, dirOnly, noPath, NULL);
if (oneFile)
    {
    sprintf(fileName, "%s.fa", outRoot);
    f = mustOpen(fileName, "w");
    }
if (liftFile)
    lift = mustOpen(liftFile, "w");


/* Count number of N's from s[0] to s[size-1].
 * Treat any parts past end of string as N's. */
while (faMixedSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name))
    {
    bits = bitAlloc(seq.size);
    setBitsN(seq.dna, seq.size, bits);
    ++seqCount;
    if (outFile != NULL)
        {
	if (seqCount > 1)
	    errAbort("Can only handle in files with one sequence using out option");
	bitsForOut(outFile, seq.size, bits);
	}
    for (pos = 0; pos < seq.size; pos += pieceSize)
        {
	char numOut[128];
	int thisSize = seq.size - pos;
	if (thisSize > (pieceSize + extra)) 
	    thisSize = pieceSize + extra;

	if ((thisSize <= extra) && (pos > 0))
	    break;  /* nobody wants duplicate smaller than extra overhang */

	if (bitCountRange(bits, pos, thisSize) <= maxN)
	    {
	    if (!oneFile)
	        {
                mkOutPath(fileName, outRoot, digits, pieceIx);
		f = mustOpen(fileName, "w");
		}
            sprintf(numOut, "%s%0*d", noPath, digits, pieceIx);
	    faWriteNext(f, numOut, seq.dna + pos, thisSize);
	    if (lift)
	        fprintf(lift, "%d\t%s\t%d\t%s\t%d\n",
		    pos, numOut, thisSize, seq.name, seq.size);
	    ++writeCount;
	    if (!oneFile)
	        carefulClose(&f);
	    }
        pieceIx++;
	}
    bitFree(&bits);
    }
carefulClose(&f);
carefulClose(&lift);
lineFileClose(&lf);
printf("%d pieces of %d written\n", writeCount, pieceIx);
}
Exemple #7
0
int main(int argc, char *argv[])
{
  struct lineFile *sif, *dsf, *daf, *gbf;
  FILE *of, *opf, *oaf, *off, *asf, *dff;
  char filename[256], *gbName;
  int verb = 0;

verboseSetLevel(0);
optionInit(&argc, argv, optionSpecs);
if (argc < 3)
    {
      fprintf(stderr, "USAGE: updateStsInfo [-verbose=<level> -gb=<file>] <stsInfo file> <all.STS.fa> <dbSTS.sts> <dbSTS.aliases> <dbSTS.convert.fa> <outfile prefix>\n");
    return 1;
    }
verb = optionInt("verbose", 0);
verboseSetLevel(verb);

 gbName = optionVal("gb", NULL);
 if (gbName) 
   gbf = lineFileOpen(gbName, TRUE);
 sif = lineFileOpen(argv[1], TRUE);
 asf = mustOpen(argv[2], "r");
 dsf = lineFileOpen(argv[3], TRUE);
 daf = lineFileOpen(argv[4], TRUE);
 dff = mustOpen(argv[5], "r");

 safef(filename, ArraySize(filename), "%s.info", argv[6]);
 of = mustOpen(filename, "w");
 safef(filename, ArraySize(filename), "%s.primers", argv[6]);
 opf = mustOpen(filename, "w");
 safef(filename, ArraySize(filename), "%s.alias", argv[6]);
 oaf = mustOpen(filename, "w");
 safef(filename, ArraySize(filename), "%s.fa", argv[6]);
 off = mustOpen(filename, "w");

 /* Read in current stsInfo file */
 verbose(1, "Reading current stsInfo file: %s\n", argv[1]);
 readStsInfo(sif);

 /* Read in genbank accessions that have sequences */ 
 if (gbName)
   {
     verbose(1, "Reading genbank accession file: %s\n", gbName);
     readGbAcc(gbf);
   }

 /* Read in primer and organism information from dbSTS.sts */
 verbose(1, "Reading current dbSTS.sts file: %s\n", argv[3]);
 readDbstsPrimers(dsf);

 /* Read in names from dbSTS.alias and create new stsInfo records if needed */
 verbose(1, "Reading current dbSTS.aliases file: %s\n", argv[4]);
 readDbstsNames(daf);

 /* Read in current sequences for sts markers */
 verbose(1, "Reading current all.STS file: %s\n", argv[2]);
 readAllSts(asf);

 /* Read in new sequences from dbSTS.fa */
 verbose(1, "Reading dbSTS.fa file: %s\n", argv[5]);
 readDbstsFa(dff);

 /* Print out the new files */
 verbose(1, "Creating output files: %s .info .primers .alias .fa\n", argv[6]);
 writeOut(of, opf, oaf, off);

 fclose(asf);
 lineFileClose(&dsf);
 lineFileClose(&daf);
 fclose(dff);
 if (gbName)
   lineFileClose(&gbf);   
 fclose(of);
 fclose(opf);
 fclose(oaf);
 fclose(off);

 return(0);
}
void hgExpDistance(char *database, char *posTable, char *expTable, char *outTable)
/* hgExpDistance - Create table that measures expression distance between pairs. */
{
struct sqlConnection *conn = sqlConnect(database);
struct sqlResult *sr;
char query[256];
char **row;
struct hash *expHash = hashNew(16);
int realExpCount = -1;
struct microData *gene;
int rc, t;
pthread_t *threads = NULL;
pthread_attr_t attr;
int *threadID = NULL;
void *status;
char *tempDir = ".";
int arrayNum; 
struct microDataDistance *geneDistPtr = NULL;	
struct microDataDistance *geneDistArray = NULL;	
int geneIx;
FILE *f = NULL;

/* Get list/hash of all items with expression values. */
safef(query, sizeof(query), "select name,expCount,expScores from %s", posTable);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    char *name = row[0];
    if (!hashLookup(expHash, name))
	{
	int expCount = sqlUnsigned(row[1]);
	int commaCount;
	float *expScores = NULL;

	sqlFloatDynamicArray(row[2], &expScores, &commaCount);
	if (expCount != commaCount)
	    errAbort("expCount and expScores don't match on %s in %s", name, posTable);
	if (realExpCount == -1)
	    realExpCount = expCount;
	if (expCount != realExpCount)
	    errAbort("In %s some rows have %d experiments others %d", 
	    	name, expCount, realExpCount);
	AllocVar(gene);
	gene->expCount = expCount;
	gene->expScores = expScores;
	hashAddSaveName(expHash, name, gene, &gene->name);
	slAddHead(&geneList, gene);
	}
    }
sqlFreeResult(&sr);
conn = sqlConnect(database);
slReverse(&geneList);
geneCount = slCount(geneList);
printf("Have %d elements in %s\n", geneCount, posTable);

weights = getWeights(realExpCount);

if (optionExists("lookup"))
    geneList = lookupGenes(conn, optionVal("lookup", NULL), geneList);
geneCount = slCount(geneList);
printf("Got %d unique elements in %s\n", geneCount, posTable);

sqlDisconnect(&conn);	/* Disconnect because next step is slow. */


if (geneCount < 1)
    errAbort("ERROR: unique gene count less than one ?");

f = hgCreateTabFile(tempDir, outTable);
synQ = synQueueNew();

/* instantiate threads */
AllocArray( threadID, numThreads );
AllocArray( threads, numThreads );
pthread_attr_init( &attr );
pthread_mutex_init( &mutexDotOut, NULL );
pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_JOINABLE );

for (t = 0; t < numThreads; t++) {
	threadID[t] = t;
	rc = pthread_create( &threads[t], &attr, computeDistance, 
						(void *) &threadID[t]);
	if (rc)
		errAbort("ERROR: in pthread_create() %d\n", rc );
} 

/* this thread will write to the file from the queue */
for (arrayNum = 0; arrayNum < geneCount; arrayNum++) {
	geneDistArray = (struct microDataDistance *)synQueueGet( synQ );
	geneDistPtr = geneDistArray;
    	/* Print out closest GENEDISTS distances in tab file. */
    	for (geneIx=0; geneIx < GENEDISTS && geneIx < geneCount; 
						++geneIx, geneDistPtr++)
		if (geneDistPtr != NULL)
			fprintf(f, "%s\t%s\t%f\n", geneDistPtr->name1, 
				geneDistPtr->name2, geneDistPtr->distance);
		else
			errAbort("ERROR: writing distance %d to file\n", 
							geneIx);
	freeMem( geneDistArray );
}

/* synchronize all threads */
for (t = 0; t < numThreads; t++) {
	rc = pthread_join( threads[t], &status);
	if (rc)
		errAbort("ERROR: in pthread_join() %d\n", rc );
} 

printf("Made %s.tab\n", outTable);

slFreeList( &geneList );

pthread_mutex_destroy( &mutexDotOut );
pthread_attr_destroy( &attr );

/* Create and load table. */
conn = sqlConnect(database);
distanceTableCreate(conn, outTable);
hgLoadTabFile(conn, tempDir, outTable, &f);
printf("Loaded %s\n", outTable);

/* Add indices. */
safef(query, sizeof(query), "alter table %s add index(query(12))", outTable);
sqlUpdate(conn, query);
printf("Made query index\n");
if (optionExists("targetIndex"))
    {
    safef(query, sizeof(query), "alter table %s add index(target(12))", outTable);
    sqlUpdate(conn, query);
    printf("Made target index\n");
    }

hgRemoveTabFile(tempDir, outTable);
}
void pickIntrons()
/** Top level routine, actually picks the introns. */
{
char *htmlFileName=NULL, *htmlFrameFileName=NULL;
char *bedFileName=NULL, *orthoBedFileName=NULL;
FILE *htmlOut=NULL, *htmlFrameOut=NULL;
FILE *bedOut=NULL, *orthoBedOut=NULL;
char *orthoEvalFile = NULL;
char *db = NULL;
struct orthoEval *ev=NULL, *evList = NULL;
struct intronEv *iv=NULL, *ivList = NULL;
int maxPicks = optionInt("numPicks", 100);
int i=0;
boolean isRefSeq=FALSE, isMgcBad=FALSE;
struct hash *posHash = newHash(12), *agxHash = newHash(12);
struct bed *bed = NULL;
char buff[256];

htmlFileName = optionVal("htmlFile", NULL);
htmlFrameFileName = optionVal("htmlFrameFile", "frame.html");
orthoEvalFile = optionVal("orthoEvalFile", NULL);
db = optionVal("db", NULL);
bedFileName = optionVal("bedOutFile", NULL);
orthoBedFileName = optionVal("orthoBedOut", NULL);
if(htmlFileName == NULL || orthoEvalFile == NULL || db == NULL || 
   bedFileName == NULL || orthoBedFileName == NULL )
    errAbort("Missing parameters. Use -help for usage.");

warn("Loading orthoEvals.");
evList = orthoEvalLoadAll(orthoEvalFile);
warn("Creating intron records");
for(ev = evList; ev != NULL; ev = ev->next)
    {
    for(i=0; i<ev->numIntrons; i++)
	{
	occassionalDot();
	iv = intronIvForEv(ev, i);
	slAddHead(&ivList, iv);
	}
    }
warn("\nDone");
warn("Sorting");
slSort(&ivList, intronEvalCmp);
warn("Done.");
htmlOut = mustOpen(htmlFileName, "w");
bedOut = mustOpen(bedFileName, "w");
htmlFrameOut = mustOpen(htmlFrameFileName, "w");
orthoBedOut = mustOpen(orthoBedFileName, "w");
i=0;
fprintf(htmlOut, "<html><body><table border=1><tr><th>Num</th><th>Mouse Acc.</th><th>Score</th><th>TS Pick</th></tr>\n");
warn("Filtering");
safef(buff, sizeof(buff), "tmp");
for(iv = ivList; iv != NULL && maxPicks > 0; iv = iv->next)
    {
    if(isUniqueCoordAndAgx(db, iv, posHash, agxHash) && iv->support == 0 && !isOverlappedByRefSeq(db, iv) &&
       ! isOverlappedByEst(db, iv) && ! isOverlappedByMRna(db, iv))
	{
	boolean twinScan = (coordOverlappedByTable(db, iv->chrom, iv->e1S, iv->e1E, "mgcTSExpPcr") &&
			    coordOverlappedByTable(db, iv->chrom, iv->e2S, iv->e2E, "mgcTSExpPcr"));
	bed = bedForIv(iv);
	if(sameString(buff, "tmp"))
	    safef(buff, sizeof(buff), "%s:%d-%d", bed->chrom, bed->chromStart-50, bed->chromEnd+50);
//	isMgcBad = isOverlappedByMgcBad(iv);
	fprintf(htmlOut, "<tr><td>%d</td><td><a target=\"browser\" "
		"href=\"http://mgc.cse.ucsc.edu/cgi-bin/hgTracks?db=hg15&position=%s:%d-%d\"> "
		"%s </a></td><td>%d</td><td>%s</td></tr>\n", 
		++i,bed->chrom, bed->chromStart-50, bed->chromEnd+50, bed->name, bed->score, 
		twinScan ? "yes" : "no");

	bedTabOutN(bed, 12, bedOut);
	bedTabOutN(iv->ev->orthoBed, 12, orthoBedOut);
	bedFree(&bed);
	maxPicks--;
	}
    }
writeOutFrames(htmlFrameOut, htmlFileName, db, bedFileName, buff);
fprintf(htmlOut, "</table></body></html>\n");
carefulClose(&bedOut);
carefulClose(&htmlOut);
carefulClose(&htmlFrameOut);
carefulClose(&orthoBedOut);
warn("Done.");
hashFree(&posHash);
hashFree(&agxHash);
}
void outputBedsFromPsls(struct hash *pslHash,char *bedOutName, char *expRecordOutName, 
			char *affyFileName, char *expFileName)
/** For each set of entries in affyFile find matching psl and create a bed. */
{
struct bed *bed = NULL, *b=NULL;
struct psl *pslList = NULL, *psl = NULL;
struct hash *expHash = NULL;
int numExps = 0;
int expCount = 0;
int i =0;
char *probeSet = NULL;
char *row[4];
char key[128];
struct slName *expNames = NULL, *name = NULL;
FILE *bedOut = NULL;
FILE *expRecordOut = NULL;
char *toDiffFileName = optionVal("toDiffFile", NULL);
FILE *toDiffOut = NULL;
struct lineFile *lf = NULL;
fillInExpHash(expFileName, &expHash, &expNames, &expCount);
lf = lineFileOpen(affyFileName, TRUE);
bedOut = mustOpen(bedOutName, "w");
if(toDiffFileName != NULL)
    toDiffOut = mustOpen(toDiffFileName, "w");

/* Loop through either adding experiments to beds or if new
   probeset create bed from psl and start over. */
while(lineFileChopNextTab(lf, row, sizeof(row)))
    {
    /* Do we have to make a new bed? */
    if(probeSet == NULL || differentWord(probeSet, row[0]))
	{
	occassionalDot();
	numExps = 0;
	/* If we have probeset print out the current beds. */
	if(probeSet != NULL)
	    {
	    for(b = bed; b != NULL; b = b->next)
		{
		int avgCount = 0;
		for(i = 0; i < b->expCount; i++)
		    if(b->expScores[i] != -10000)
			avgCount++;
		if(avgCount != 0 && b->score > 0)
		    b->score = log(b->score / avgCount) * 100;
		else
		    b->score = 0;
		bedTabOutN(b, 15, bedOut);
		if(toDiffOut != NULL)
		    outputToDiffRecord(b, expNames, toDiffOut);
		}
	    }
	bedFreeList(&bed);
	/* Lookup key in pslHash to find list of psl. */
	safef(key, sizeof(key), "%s", row[0]);
	pslList = hashFindVal(pslHash, key);
	/* Can have multiple psls. */
	for(psl = pslList; psl != NULL; psl = psl->next)
	    {
	    b = bedFromPsl(psl);
	    AllocArray(b->expIds, expCount );
	    AllocArray(b->expScores, expCount);
	    b->expCount = expCount;
	    initBedScores(b, expCount);
	    slAddHead(&bed, b);
	    }
	}
    if(bed != NULL)
	{
	/* Allocate larger arrays if necessary. */
	if(numExps > expCount)
	    {
	    errAbort("Supposed to be %d experiments but probeset %s has at least %d",
		     expCount, bed->name, numExps);
	    }
	for(b = bed; b != NULL; b = b->next)
	    {
	    int exp = hashIntVal(expHash, row[1]);
	    if(differentWord(row[3], "NaN"))
	       b->expScores[exp] = atof(row[3]);
	    if(differentWord(row[2], "NaN"))
	       b->score += atof(row[2]);
	    }
	numExps++;
	}
    freez(&probeSet);
    probeSet = cloneString(row[0]);
    }
expRecordOut = mustOpen(expRecordOutName, "w");
i = 0;
for(name = expNames; name != NULL; name = name->next)
    {
    subChar(name->name, ',', '_');	    
    subChar(name->name, ' ', '_');
    fprintf(expRecordOut, "%d\t%s\tuclaExp\tuclaExp\tuclaExp\tuclaExp\t1\t%s,\n", i++, name->name, name->name);
    }
hashFree(&expHash);
slFreeList(&expNames);
carefulClose(&expRecordOut);
carefulClose(&bedOut);
lineFileClose(&lf);
}
Exemple #11
0
void featureBits(char *database, int tableCount, char *tables[])
/* featureBits - Correlate tables via bitmap projections and booleans. */
{
struct sqlConnection *conn = NULL;
char *bedName = optionVal("bed", NULL), *faName = optionVal("fa", NULL);
char *binName = optionVal("bin", NULL);
char *bedRegionInName = optionVal("bedRegionIn", NULL);
char *bedRegionOutName = optionVal("bedRegionOut", NULL);
FILE *bedFile = NULL, *faFile = NULL, *binFile = NULL;
FILE *bedRegionOutFile = NULL;
struct bed *bedRegionList = NULL;
boolean faIndependent = FALSE;
struct chromInfo *cInfo;

if (bedName)
    bedFile = mustOpen(bedName, "w");
if (binName)
    binFile = mustOpen(binName, "w");
if ((bedRegionInName && !bedRegionOutName) || (!bedRegionInName && bedRegionOutName))
    errAbort("bedRegionIn and bedRegionOut must both be specified");
if (faName)
    {
    boolean faMerge = optionExists("faMerge");
    faFile = mustOpen(faName, "w");
    if (tableCount > 1)
        {
	if (!faMerge)
	    errAbort("For fa output of multiple tables you must use the "
	             "faMerge option");
	}
    faIndependent = (!faMerge);
    }

if (chromSizes != NULL)
    chromInfoList = chromInfoLoadAll(chromSizes);
else
    chromInfoList = fbCreateChromInfoList(clChrom, database);

if (!countGaps)
    conn = hAllocConn(database);
checkInputExists(conn, database, chromInfoList, tableCount, tables);

if (!faIndependent)
    {
    double totalBases = 0, totalBits = 0;
    int firstTableBits = 0, secondTableBits = 0;
    int *pFirstTableBits = NULL, *pSecondTableBits = NULL;
    double totalFirstBits = 0, totalSecondBits = 0;
    static int dotClock = 1;

    if (calcEnrichment)
        {
	pFirstTableBits = &firstTableBits;
	pSecondTableBits = &secondTableBits;
	}
    if (bedRegionInName)
	{
	struct lineFile *lf = lineFileOpen(bedRegionInName, TRUE);
	struct bed *bed;
	char *row[3];
	
	bedRegionOutFile = mustOpen(bedRegionOutName, "w");
	while (lineFileRow(lf, row))
	    {
	    if (startsWith(row[0],"#")||startsWith(row[0],"chrom"))
		continue;
	    bed = bedLoad3(row);
	    slAddHead(&bedRegionList, bed);
	    }
	lineFileClose(&lf);
	slReverse(&bedRegionList);
	}
    for (cInfo = chromInfoList; cInfo != NULL; cInfo = cInfo->next)
	{
	if (inclChrom(cInfo->chrom))
	    {
	    int chromBitSize;
	    int chromSize = cInfo->size;
	    verbose(3,"chromFeatureBits(%s)\n", cInfo->chrom);
	    chromFeatureBits(conn, database, cInfo->chrom, tableCount, tables,
		bedFile, faFile, binFile, bedRegionList, bedRegionOutFile, 
		chromSize, &chromBitSize, pFirstTableBits, pSecondTableBits
		);
	    totalBases += countBases(conn, cInfo->chrom, chromSize, database);
	    totalBits += chromBitSize;
	    totalFirstBits += firstTableBits;
	    totalSecondBits += secondTableBits;
	    if (dots > 0)
		{
		if (--dotClock <= 0)
		    {
		    fputc('.', stdout);
		    fflush(stdout);
		    dotClock = dots;
		    }
		}
	    }
	}
	if (dots > 0)
	    {
	    fputc('\n', stdout);
	    fflush(stdout);
	    }
    if (calcEnrichment)
        fprintf(stderr,"%s %5.3f%%, %s %5.3f%%, both %5.3f%%, cover %4.2f%%, enrich %4.2fx\n",
		tables[0], 
		100.0 * totalFirstBits/totalBases,
		tables[1],
		100.0 * totalSecondBits/totalBases,
		100.0 * totalBits/totalBases,
		100.0 * totalBits / totalFirstBits,
		(totalBits/totalSecondBits) / (totalFirstBits/totalBases) );
    else
	fprintf(stderr,"%1.0f bases of %1.0f (%4.3f%%) in intersection\n",
	    totalBits, totalBases, 100.0*totalBits/totalBases);
    }
else
    {
    int totalItems = 0;
    double totalBases = 0;
    int itemCount, baseCount;
    for (cInfo = chromInfoList; cInfo != NULL; cInfo = cInfo->next)
        {
	if (inclChrom(cInfo->chrom))
	    {
	    chromFeatureSeq(conn, database, cInfo->chrom, tables[0],
		    bedFile, faFile, &itemCount, &baseCount);
	    totalBases += countBases(conn, cInfo->chrom, baseCount, database);
	    totalItems += itemCount;
	    }
	}
    }
hFreeConn(&conn);
}
Exemple #12
0
int main(int argc, char *argv[])
/* Process command line. */
{
optionInit(&argc, argv, optionSpecs);
if (optionExists("algoHelp"))
    prAlgo();
if (argc != 3)
    usage("wrong # of args");

gLocalNearBest = optionFrac("localNearBest", gLocalNearBest);
gGlobalNearBest = optionFrac("globalNearBest", gGlobalNearBest);
if ((gLocalNearBest >= 0.0) && (gGlobalNearBest >= 0.0))
    errAbort("can only specify one of -localNearBest and -globalNearBest");
if (optionExists("usePolyTHead"))
    gCDnaOpts |= cDnaUsePolyTHead;
if (optionExists("ignoreNs"))
    gCDnaOpts |= cDnaIgnoreNs;
if (optionExists("ignoreIntrons"))
    gCDnaOpts |= cDnaIgnoreIntrons;
if (optionExists("repsAsMatch"))
    gCDnaOpts |= cDnaRepsAsMatch;
gMinId = optionFrac("minId", gMinId);
gMinCover = optionFrac("minCover", gMinCover);
gMinSpan = optionFrac("minSpan", gMinSpan);
gMinQSize = optionInt("minQSize", gMinQSize);
gMaxAligns = optionInt("maxAligns", gMaxAligns);
gMaxAlignsDrop = optionInt("maxAlignsDrop", gMaxAlignsDrop);
if ((gMaxAligns >= 0) && (gMaxAlignsDrop >= 0))
    errAbort("cannot specify both -maxAligns and -maxAlignsDrop");
gMinAlnSize = optionInt("minAlnSize", gMinAlnSize);
gMinNonRepSize = optionInt("minNonRepSize", gMinNonRepSize);
gMaxRepMatch = optionFrac("maxRepMatch", gMaxRepMatch);
gPolyASizes = optionVal("polyASizes", NULL);
if (optionExists("usePolyTHead") && (gPolyASizes == NULL))
    errAbort("must specify -polyASizes with -usePolyTHead");
gHapRegions = optionVal("hapRegions", NULL);
gBestOverlap = optionExists("bestOverlap");
gDropped = optionVal("dropped", NULL);
gWeirdOverlappped = optionVal("weirdOverlapped", NULL);
gFilterWeirdOverlapped = optionExists("filterWeirdOverlapped");
gHapRefMapped = optionVal("hapRefMapped", NULL);
gHapRefCDnaAlns = optionVal("hapRefCDnaAlns", NULL);
gHapLociAlns = optionVal("hapLociAlns", NULL);
if (optionExists("noValidate"))
    gValidate = FALSE;
cDnaAlignsAlnIdQNameMode = optionExists("alnIdQNameMode");
if (optionExists("ignoreNs"))
    gCDnaOpts |= cDnaIgnoreNs;
gUniqueMapped = optionExists("uniqueMapped");
gDecayMinCover = optionExists("decayMinCover");
char *blackList = optionVal("blackList", NULL);

if (blackList != NULL)
    gBlackListRanges = genbankBlackListParse(blackList);

if ( gDecayMinCover && (gMinCover > 0.0))
    errAbort("can only specify one of -minCoverage and -decayMinCoverage");

pslCDnaFilter(argv[1], argv[2]);
return 0;
}
int main(int argc, char *argv[])
/* Process command line. */
{
boolean drop, move, copy;
struct sigaction sigSpec;
setlinebuf(stdout);
setlinebuf(stderr);

ZeroVar(&sigSpec);
sigSpec.sa_handler = sigStopSignaled;
sigSpec.sa_flags = SA_RESTART;
if (sigaction(SIGUSR1, &sigSpec, NULL) < 0)
    errnoAbort("can't set SIGUSR1 handler");

optionInit(&argc, argv, optionSpecs);
drop = optionExists("drop");
move = optionExists("move");
copy = optionExists("copy");
gReload = optionExists("reload");
if (move || copy) 
    {
    if (argc != 3)
        usage();
    }
else if (argc != 2)
    usage();
if ((drop+move+copy) > 1)
    errAbort("can only specify one of -drop, -move, or -copy");

gbVerbInit(optionInt("verbose", 0));
if (gbVerbose >= 6)
    sqlMonitorEnable(JKSQL_TRACE);
if (drop)
    dropAll(argv[1]);
else if (move)
    moveAll(argv[1], argv[2]);
else if (copy)
    copyAll(argv[1], argv[2]);
else
    {
    char *reloadList = optionVal("reloadList", NULL);
    gDatabase = argv[1];
    gOptions = dbLoadOptionsParse(gDatabase);
    gForceIgnoreDelete = optionExists("forceIgnoreDelete");
    if (optionExists("rebuildDerived"))
        gOptions.flags |= DBLOAD_BYPASS_GBLOADED|DBLOAD_REBUILD_DERIVED;

    gMaxShrinkage = optionFloat("maxShrinkage", 0.1);

    
    gGbdbGenBank = optionVal("gbdbGenBank", NULL);
    if (gGbdbGenBank == NULL)
        gGbdbGenBank = gbConfGet(gOptions.conf, "gbdb.genbank");
    if (gGbdbGenBank == NULL)
        gGbdbGenBank = "/gbdb/genbank";
    gWorkDir = optionVal("workdir", "work/load");

    if (gOptions.flags & DBLOAD_DRY_RUN)
        printf("*** using dry run mode ***\n");
    gbLoadRna(reloadList);
    }

return 0;
}
int main(int argc, char *argv[])
/* Process command line. */
{
    char *db, *cdsDb, *cdsFile, *pslSpec, *genePredFile;
    int optCnt;

    optionInit(&argc, argv, optionSpecs);
    if (argc != 3)
        usage();
    pslSpec = argv[1];
    genePredFile = argv[2];
    db = optionVal("db", NULL);
    cdsDb = optionVal("cdsDb", NULL);
    cdsFile = optionVal("cdsFile", NULL);
    gRequireUtr = optionExists("requireUtr");
    if (optionExists("cdsMergeMod3") && !optionExists("cdsMergeSize"))
        errAbort("must specify -cdsMergeSize with -cdsMergeMod3");
    if (optionExists("cdsMergeSize") || optionExists("utrMergeSize"))
    {
        gCdsMergeSize = optionInt("cdsMergeSize", -1);
        gUtrMergeSize = optionInt("utrMergeSize", -1);
        if (optionExists("cdsMergeMod3"))
            gPslOptions |= genePredPslCdsMod3;
        if (optionExists("smallInsertSize") || optionExists("insertMergeSize"))
            errAbort("can't specify -smallInsertSize or -insertMergeSize with -cdsMergeSize or -utrMergeSize");
    }
    else
    {
        int insertMergeSize = genePredStdInsertMergeSize;
        if (optionExists("smallInsertSize"))
            insertMergeSize = optionInt("smallInsertSize", genePredStdInsertMergeSize);
        insertMergeSize = optionInt("insertMergeSize", genePredStdInsertMergeSize);
        gCdsMergeSize = gUtrMergeSize = insertMergeSize;
    }
    gGenePredExt = optionExists("genePredExt");
    gKeepInvalid = optionExists("keepInvalid");
    gAllCds = optionExists("allCds");
    gNoCds = optionExists("noCds");
    gQuiet = optionExists("quiet");
    gIgnoreUniqSuffix = optionExists("ignoreUniqSuffix");

    if ((gAllCds || gNoCds) && ((cdsDb != NULL) || (cdsFile != NULL)))
        errAbort("can't specify -allCds or -noCds with -cdsDb or -cdsFile");
    if (gAllCds && gRequireUtr)
        errAbort("can't specify -allCds with -requireUtr");
    /* this is a bit of work to implement */
    if ((gAllCds || gNoCds) && (db != NULL))
        errAbort("can't specify -allCds or -noCds with -db");

    optCnt = 0;
    if (db != NULL)
        optCnt++;
    if (cdsDb == NULL)
        optCnt++;
    if (cdsFile != NULL)
        optCnt++;
    if (gAllCds)
        optCnt++;
    if (gNoCds)
        optCnt++;

    if (optCnt == 1)
        errAbort("must specify one and only one of -db, -cdsDb, -cdsFile, -allCds, or -noCds");

    mrnaToGene(db, cdsDb, cdsFile, pslSpec, genePredFile);
    return 0;
}
Exemple #15
0
int main(int argc, char *argv[])
/* Process command line. */
{
char *chromInfo;
optionInit(&argc, argv, optionSpecs);
if (argc < 4)
    usage();
noBin = optionExists("noBin") || optionExists("nobin");
noSort = optionExists("noSort");
strictTab = optionExists("tab");
oldTable = optionExists("oldTable");
sqlTable = optionVal("sqlTable", sqlTable);
renameSqlTable = optionExists("renameSqlTable");
trimSqlTable = optionExists("trimSqlTable");
as = optionVal("as", as);
type = optionVal("type", type);
hasBin = optionExists("hasBin");
noLoad = optionExists("noLoad");
noHistory = optionExists("noHistory");
bedGraph = optionInt("bedGraph",0);
bedDetail = optionExists("bedDetail");
minScore = optionInt("minScore",100);
if (minScore<0 || minScore>1000)
    errAbort("minScore must be between 0-1000\n");
notItemRgb = optionExists("notItemRgb");
if (notItemRgb) itemRgb = FALSE;
maxChromNameLength = optionInt("maxChromNameLength",0);
dotIsNull = optionInt("dotIsNull",dotIsNull);
noStrict = optionExists("noStrict") || optionExists("nostrict");
allowStartEqualEnd = optionExists("allowStartEqualEnd");
tmpDir = optionVal("tmpDir", tmpDir);
nameIx = ! optionExists("noNameIx");
ignoreEmpty = optionExists("ignoreEmpty");
allowNegativeScores = optionExists("allowNegativeScores");
customTrackLoader = optionExists("customTrackLoader");
parseType();
/* turns on: noNameIx, ignoreEmpty, allowStartEqualEnd, allowNegativeScores
 * -verbose=0 */
if (customTrackLoader)
    {
    type = NULL;   /* because customTrack/Factory has already validated the input */
    ignoreEmpty = TRUE;
    noHistory = TRUE;
    nameIx = FALSE;
    allowStartEqualEnd = TRUE;
    allowNegativeScores = TRUE;
    verboseSetLevel(0);
    expireSeconds = 1200;	/* 20 minutes */
    (void) signal(SIGALRM, selfApoptosis);
    (void) alarm(expireSeconds);	/* CGI timeout */
    }
fillInScoreColumn = optionVal("fillInScore", NULL);

chromInfo=optionVal("chromInfo", NULL);
if (chromInfo)
    {
    if (!type)
	errAbort("Only use chromInfo with type for validate");
    // Get chromInfo from file
    chrHash = chromHashFromFile(chromInfo); 
    }
else if (type)
    {
    // Get chromInfo from DB
    chrHash = chromHashFromDatabase(argv[1]); 
    }

hgLoadBed(argv[1], argv[2], argc-3, argv+3);
return 0;
}
Exemple #16
0
int main(int argc, char *argv[])
{
  struct lineFile *pf, *ef, *apf;
  FILE *of, *nf, *enf=NULL;
  char *efName=NULL, filename[256];
  int verb = 0;

verboseSetLevel(0);
optionInit(&argc, argv, optionSpecs);
if (argc < 3)
    {
      fprintf(stderr, "USAGE: pslAnal [-epcr=<file> -verbose=<level>] <isPCR psl file> <all.primers> <outfile>\n");
    return 1;
    }
verb = optionInt("verbose", 0);
verboseSetLevel(verb);

 efName = optionVal("epcr", NULL);
 pf = pslFileOpen(argv[1]);
 apf = lineFileOpen(argv[2], TRUE);

 of = mustOpen(argv[3], "w");
 sprintf(filename, "%s.notfound.primers", argv[3]);
 nf = mustOpen(filename, "w");

 verbose(1, "Reading all.primers file\n");
 readPrimerInfo(apf);

 if (efName)
   {
     ef = lineFileOpen(efName, TRUE);
     verbose(1, "Reading epcr file\n");
     readEpcr(ef);
   }

 verbose(1, "Reading and processing isPCR file\n");
 processPrimers(pf, of);

 if (efName)
   {
     verbose(1, "Writing epcr.not.found file\n");
     sprintf(filename, "epcr.not.found");
     enf = mustOpen(filename, "w");
     writeEpcrNotFound(enf);
   }

 verbose(1, "Writing out primers not found\n");
 writePrimersNotFound(nf);

 if (efName)
   {
     lineFileClose(&ef);
     fclose(enf);
   }
 lineFileClose(&pf);
 lineFileClose(&apf);
 fclose(of);
 fclose(nf);

 return(0);
}
int main(int argc, char *argv[])
/* Process command line. */
{
struct sqlConnection *conn = NULL;
char *command = NULL;
optionInit(&argc, argv, options);
database = optionVal("database", database);
sqlPath = optionVal("sqlPath", sqlPath);
if (argc < 2)
    usage();
command = argv[1];
if (argc >= 3)
    setCurrentDir(argv[2]);
conn = sqlConnect(database);
if (sameWord(command,"INIT"))
    {
    if (argc != 2)
	usage();
    errAbort("INIT is probably too dangerous. DO NOT USE.");
    /*	    
    init(conn);	    
    */
    }
else if (sameWord(command,"POP"))
    {
    if (argc != 2)
	usage();
    /* populate vgPrb where missing */
    populateMissingVgPrb(conn);
    }
else if (sameWord(command,"SEQ"))
    {
    if (argc != 4)
	usage();
    /* make fake probe sequences */
    makeFakeProbeSeq(conn,argv[3]);
    }
else if (sameWord(command,"ALI"))
    {
    if (argc != 4)
	usage();
    /* blat anything left that is not aligned, 
      nor even attempted */
    doAlignments(conn,argv[3]);
    }
else if (sameWord(command,"EXT"))
    {
    if (argc != 4)
	usage();
    /* update seq and extfile as necessary */
    doSeqAndExtFile(conn,argv[3],"vgProbes");
    }
else if (sameWord(command,"PSLMAP"))
    {
    if (argc != 5)
	usage();
    /* pslMap anything left that is not aligned, 
      nor even attempted */
    doAlignmentsPslMap(conn,argv[3],argv[4]);
    }
else if (sameWord(command,"REMAP"))
    {
    if (argc != 7)
	usage();
    /* re-map anything in track specified that is not aligned, 
      nor even attempted yet, using specified fasta file. */
    doAlignmentsReMap(conn,argv[3],argv[4],argv[5],argv[6]);
    }
else if (sameWord(command,"SELFMAP"))
    {
    if (argc != 4)
	usage();
    /* re-map anything in track specified that is not aligned, 
      nor even attempted yet, using specified fasta file. */
    doAlignmentsSelfMap(conn,argv[3]);
    }
else if (sameWord(command,"EXTALL"))
    {
    if (argc != 4)
	usage();
    /* update seq and extfile as necessary */
    doSeqAndExtFile(conn,argv[3],"vgAllProbes");
    }
else
    usage();
sqlDisconnect(&conn);
return 0;
}
int main(int argc, char *argv[])
/* Process command line. */
{
int i;
char *cp;
unsigned long long reversed;
size_t maxAlloc;
char asciiAlloc[32];

optionInit(&argc, argv, options);

if (argc < 2)
    usage();

maxAlloc = 2100000000 *
	 (((sizeof(size_t)/4)*(sizeof(size_t)/4)*(sizeof(size_t)/4)));
sprintLongWithCommas(asciiAlloc, (long long) maxAlloc);
verbose(4, "#\tmaxAlloc: %s\n", asciiAlloc);
setMaxAlloc(maxAlloc);
/* produces: size_t is 4 == 2100000000 ~= 2^31 = 2Gb
 *      size_t is 8 = 16800000000 ~= 2^34 = 16 Gb
 */

dnaUtilOpen();

motif = optionVal("motif", NULL);
chr = optionVal("chr", NULL);
strand = optionVal("strand", NULL);
bedOutput = optionExists("bedOutput");
wigOutput = optionExists("wigOutput");

if (wigOutput)
    bedOutput = FALSE;
else
    bedOutput = TRUE;

if (chr)
    verbose(2, "#\tprocessing chr: %s\n", chr);
if (strand)
    verbose(2, "#\tprocessing strand: '%s'\n", strand);
if (motif)
    verbose(2, "#\tsearching for motif: %s\n", motif);
else {
    warn("ERROR: -motif string empty, please specify a motif\n");
    usage();
}
verbose(2, "#\ttype output: %s\n", wigOutput ? "wiggle data" : "bed format");
verbose(2, "#\tspecified sequence: %s\n", argv[1]);
verbose(2, "#\tsizeof(motifVal): %d\n", (int)sizeof(motifVal));
if (strand)
    {
    if (! (sameString(strand,"+") | sameString(strand,"-")))
	{
	warn("ERROR: -strand specified ('%s') is not + or - ?\n", strand);
	usage();
	}
    /*	They are both on by default, turn off the one not specified */
    if (sameString(strand,"-"))
	doPlusStrand = FALSE;
    if (sameString(strand,"+"))
	doMinusStrand = FALSE;
    }
motifLen = strlen(motif);
/*	at two bits per character, size limit of motif is
 *	number of bits in motifVal / 2
 */
if (motifLen > (4*sizeof(motifVal))/2 )
    {
    warn("ERROR: motif string too long, limit %d\n", (4*(int)sizeof(motifVal))/2 );
    usage();
    }
cp = motif;
motifVal = 0;
complementVal = 0;
for (i = 0; i < motifLen; ++i)
    {
	switch (*cp)
	{
	case 'a':
	case 'A':
	    motifVal = (motifVal << 2) | A_BASE_VAL;
	    complementVal = (complementVal << 2) | T_BASE_VAL;
	    break;
	case 'c':
	case 'C':
	    motifVal = (motifVal << 2) | C_BASE_VAL;
	    complementVal = (complementVal << 2) | G_BASE_VAL;
	    break;
	case 'g':
	case 'G':
	    motifVal = (motifVal << 2) | G_BASE_VAL;
	    complementVal = (complementVal << 2) | C_BASE_VAL;
	    break;
	case 't':
	case 'T':
	    motifVal = (motifVal << 2) | T_BASE_VAL;
	    complementVal = (complementVal << 2) | A_BASE_VAL;
	    break;
	default:
	    warn(
		"ERROR: character in motif: '%c' is not one of ACGT\n", *cp);
	    usage();
	}
	++cp;
    }
reversed = 0;
for (i = 0; i < motifLen; ++i)
    {
    int base;
    base = complementVal & 3;
    reversed = (reversed << 2) | base;
    complementVal >>= 2;
    }
complementVal = reversed;
verbose(2, "#\tmotif numerical value: %llu (%#llx)\n", motifVal, motifVal);
verbose(2, "#\tcomplement numerical value: %llu (%#llx)\n", complementVal, complementVal);
if (motifLen < 5)
    {
    warn("ERROR: motif string must be more than 4 characters\n");
    usage();
    }

findMotif(argv[1]);
return 0;
}
Exemple #19
0
/* entry */
int main(int argc, char** argv)
{
char *selectFile, *inFile, *outFile, *dropFile;
optionInit(&argc, argv, optionSpecs);
if (argc != 4)
    usage("wrong # args");
selectFile = argv[1];
inFile = argv[2];
outFile = argv[3];

/* select file options */
if (optionExists("selectFmt") && optionExists("selectCoordCols"))
    errAbort("can't specify both -selectFmt and -selectCoordCols");

if (optionExists("selectFmt"))
    selectFmt = parseFormatSpec(optionVal("selectFmt", NULL));
else if (optionExists("selectCoordCols"))
    {
    selectCoordCols = coordColsParseSpec("selectCoordCols",
                                         optionVal("selectCoordCols", NULL));
    selectFmt = COORD_COLS_FMT;
    }
else
    selectFmt = getFileFormat(selectFile);

if (optionExists("selectCds"))
    selectCaOpts |= chromAnnCds;
if (optionExists("selectRange"))
    selectCaOpts |= chromAnnRange;
if ((selectFmt == PSLQ_FMT) || (selectFmt == CHAINQ_FMT))
    selectCaOpts |= chromAnnUseQSide;

/* in file options */
if (optionExists("inFmt") && optionExists("inCoordCols"))
    errAbort("can't specify both -inFmt and -inCoordCols");
if (optionExists("inFmt"))
    inFmt = parseFormatSpec(optionVal("inFmt", NULL));
else if (optionExists("inCoordCols"))
    {
    inCoordCols = coordColsParseSpec("inCoordCols",
                                     optionVal("inCoordCols", NULL));
    inFmt = COORD_COLS_FMT;
    }
else
    inFmt = getFileFormat(inFile);

inCaOpts = chromAnnSaveLines; // need lines for output
if (optionExists("inCds"))
    inCaOpts |= chromAnnCds;
if (optionExists("inRange"))
    inCaOpts |= chromAnnRange;
if ((inFmt == PSLQ_FMT) || (inFmt == CHAINQ_FMT))
    inCaOpts |= chromAnnUseQSide;

/* select options */
useAggregate = optionExists("aggregate");
nonOverlapping = optionExists("nonOverlapping");
if (optionExists("strand") && optionExists("oppositeStrand"))
    errAbort("can only specify one of -strand and -oppositeStrand");
if (optionExists("strand"))
    selectOpts |= selStrand;
if (optionExists("oppositeStrand"))
    selectOpts |= selOppositeStrand;
if (optionExists("excludeSelf") && (optionExists("idMatch")))
    errAbort("can't specify both -excludeSelf and -idMatch");
if (optionExists("excludeSelf"))
    selectOpts |= selExcludeSelf;
if (optionExists("idMatch"))
    selectOpts |= selIdMatch;

criteria.threshold = optionFloat("overlapThreshold", 0.0);
criteria.thresholdCeil = optionFloat("overlapThresholdCeil", 1.1);
criteria.similarity = optionFloat("overlapSimilarity", 0.0);
criteria.similarityCeil = optionFloat("overlapSimilarityCeil", 1.1);
criteria.bases = optionInt("overlapBases", -1);

/* output options */
mergeOutput = optionExists("mergeOutput");
idOutput = optionExists("idOutput");
statsOutput = optionExists("statsOutput") || optionExists("statsOutputAll") || optionExists("statsOutputBoth");
if ((mergeOutput + idOutput + statsOutput) > 1)
    errAbort("can only specify one of -mergeOutput, -idOutput, -statsOutput, -statsOutputAll, or -statsOutputBoth");
outputAll = optionExists("statsOutputAll");
outputBoth = optionExists("statsOutputBoth");
if (outputBoth)
    outputAll = TRUE;
if (mergeOutput)
    {
    if (nonOverlapping)
        errAbort("can't use -mergeOutput with -nonOverlapping");
    if (useAggregate)
        errAbort("can't use -mergeOutput with -aggregate");
    if ((selectFmt == CHAIN_FMT) || (selectFmt == CHAINQ_FMT)
        || (inFmt == CHAIN_FMT) || (inFmt == CHAINQ_FMT))
    if (useAggregate)
        errAbort("can't use -mergeOutput with chains");
    selectCaOpts |= chromAnnSaveLines;
    }
dropFile = optionVal("dropped", NULL);

/* check for options incompatible with aggregate mode */
if (useAggregate)
    {
    int i;
    for (i = 0; aggIncompatible[i] != NULL; i++)
        {
        if (optionExists(aggIncompatible[i]))
            errAbort("-%s is not allowed -aggregate", aggIncompatible[i]);
        }
    }

overlapSelect(selectFile, inFile, outFile, dropFile);
return 0;
}
Exemple #20
0
int main(int argc, char *argv[])
/* Process command line. */
{
optionInit(&argc, argv, options);
if (optionExists("help"))
    {
    printHelp();
    }
    
if (argc != 3) 
    {
    usage();
    }
outMdb = optionVal("outMdb", outMdb);
onlyCompTdb = optionExists("onlyCompTdb");
release = optionVal("release", release);
releaseNum = optionInt("releaseNum", releaseNum);

char *database = argv[1];
char *composite = argv[2];


char defaultMetaDb[1024];
char defaultDownloadDir[1024];
char tempDownloadDir[1024];
char *src = getSrcDir();
char *org = cloneString(hOrganism(database));
org[0] = tolower(org[0]);
/* If user doesn't provide a metaDB, assume the path using the database and composite  */
safef(defaultMetaDb, sizeof(defaultMetaDb), "%s/hg/makeDb/trackDb/%s/%s/metaDb/%s/%s.ra", src, org, database, release, composite);
/* If user doesn't provide a downloadDir, assume the path using the database and composite  */
safef(defaultDownloadDir, sizeof(defaultDownloadDir), "/usr/local/apache/htdocs-hgdownload/goldenPath/%s/encodeDCC/%s", database, composite);
safef(tempDownloadDir, sizeof(tempDownloadDir), "/usr/local/apache/htdocs-hgdownload/goldenPath/%s/encodeDCC/%s", database, composite);
if (releaseNum)
    safef(defaultDownloadDir, sizeof(defaultDownloadDir), "%s/release%d", tempDownloadDir, releaseNum);

/* If user doesn't provide a trackDB, assume the path using the database and composite  */
char defaultTrackDb[1024];
/* Load encode composite-includer trackDb.wgEncode.ra */
char trackDbIncluder[1024];
safef(trackDbIncluder, sizeof(trackDbIncluder), "%s/hg/makeDb/trackDb/%s/%s/%s", src, org, database, "trackDb.wgEncode.ra");
struct raFile *includerFile = raFileRead(trackDbIncluder);
/* Find the correct trackDb.ra for the composite */
int numTagsFound = -1;
char *compositeName = findCompositeRa(includerFile, composite, release, &numTagsFound);
if (!compositeName)
    errAbort("unable to find composite .ra for the track in trackDb.wgEncode.ra\n");
// if numTagsFound == 1 then a composite .ra with a single alpha tag exists already, 
//  so no further work required on trackDb.wgEncode.ra
safef(defaultTrackDb, sizeof(defaultTrackDb), "%s/hg/makeDb/trackDb/%s/%s/%s", src, org, database, compositeName);
    
verbose(1,"database: %s\ncomposite: %s\nrelease %s\ndefault trackDb: %s\ndefault metaDb: %s\ndefault downloadDir: %s\n",
    database, composite, release, defaultTrackDb, defaultMetaDb, defaultDownloadDir);

char *metaDb = optionVal("metaDb",defaultMetaDb);
replaceTildeWithHome(&metaDb);
if (!fileExists(metaDb))
    errAbort("metaDb %s does not exist.", metaDb);

char *trackDb = optionVal("trackDb",defaultTrackDb);
replaceTildeWithHome(&trackDb);
if (!fileExists(trackDb))
    errAbort("trackDb %s does not exist.", trackDb);

char *downloadDir = optionVal("downloadDir",defaultDownloadDir);
replaceTildeWithHome(&downloadDir);
if (!fileExists(downloadDir))
    errAbort("downloadDir %s does not exist.", downloadDir);


printf("metaDb = %s\n trackDb = %s\n downloadDir = %s\n",metaDb,trackDb,downloadDir);

metaCheck(database, composite, metaDb, trackDb, downloadDir);

return 0;
}
int main(int argc, char *argv[])
{
  struct lineFile *pf, *ef, *apf;
  FILE *of, *nf, *enf=NULL;
  char *efName=NULL, filename[256], notFound[256];
  int verb = 0;

verboseSetLevel(0);
optionInit(&argc, argv, optionSpecs);
if (argc < 3)
    {
      verbose(0, "usage: pslFilterPrimers [-epcr=<file> -verbose=<level>] <isPCR psl file> <all.primers> <outfile>\n");
    return 1;
    }
verb = optionInt("verbose", 0);
verboseSetLevel(verb);

 efName = optionVal("epcr", NULL);
 pf = pslFileOpen(argv[1]);
 apf = lineFileOpen(argv[2], TRUE);

 of = mustOpen(argv[3], "w");
 safef(notFound, sizeof(filename), "%s.notfound.primers", argv[3]);
 nf = mustOpen(notFound, "w");

 verbose(1, "Reading all primers file: '%s'\n", argv[2]);
 readPrimerInfo(apf);

 if (efName)
   {
     ef = lineFileOpen(efName, TRUE);
     verbose(1, "Reading epcr file: '%s'\n", efName);
     readEpcr(ef);
   }

 verbose(1, "Reading isPCR file: '%s' processing output to: '%s'\n", argv[1], argv[3]);
 processPrimers(pf, of);

 if (efName)
   {
     safef(filename, sizeof(filename), "epcr.not.found");
     verbose(1, "Writing %s file\n", filename);
     enf = mustOpen(filename, "w");
     writeEpcrNotFound(enf);
   }

 verbose(1, "Writing primers not found to file: '%s'\n", notFound);
 writePrimersNotFound(nf);

 if (efName)
   {
     lineFileClose(&ef);
     fclose(enf);
   }
 lineFileClose(&pf);
 lineFileClose(&apf);
 fclose(of);
 fclose(nf);

 return(0);
}
void splitByGap(char *inName, int pieceSize, char *outRoot, long long estSize)
/* Split up file into pieces at most pieceSize bases long, at gap boundaries 
 * if possible. */
{
off_t pieces = (estSize + pieceSize-1)/pieceSize;
int digits = digitsBaseTen(pieces);
int minGapSize = optionInt("minGapSize", 1000);
boolean noGapDrops = optionExists("noGapDrops");
int maxN = optionInt("maxN", pieceSize-1);
boolean oneFile = optionExists("oneFile");
char fileName[512];
char dirOnly[256], noPath[128];
int pos, pieceIx = 0, writeCount = 0;
struct dnaSeq seq;
struct lineFile *lf = lineFileOpen(inName, TRUE);
FILE *f = NULL;
Bits *bits = NULL;
int seqCount = 0;
char *outFile = optionVal("out", NULL);
char *liftFile = optionVal("lift", NULL);
FILE *lift = NULL;
ZeroVar(&seq);

if (minGapSize < 1)
    errAbort("ERROR: minGapSize must be > 0");

splitPath(outRoot, dirOnly, noPath, NULL);
if (oneFile)
    {
    sprintf(fileName, "%s.fa", outRoot);
    f = mustOpen(fileName, "w");
    }
else
    fileName[0] = '\0';
if (liftFile)
    lift = mustOpen(liftFile, "w");

while (faMixedSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name))
    {
    bits = bitAlloc(seq.size);
    setBitsN(seq.dna, seq.size, bits);
    ++seqCount;
    if (outFile != NULL)
        {
	if (seqCount > 1)
	    errAbort("Can only handle in files with one sequence using out option");
	bitsForOut(outFile, seq.size, bits);
	}
    pos = 0;
    while (pos < seq.size)
        {
	boolean gotGap = FALSE;
	int gapStart = 0;
	int gapSize  = 0;
	int endSize  = seq.size - pos;
	int thisSize = min(endSize, pieceSize);
	int startGapLen = 0;

	if (seq.dna[pos] == 'n' || seq.dna[pos] == 'N')
	    {
	    startGapLen = bitFindClear(bits, pos, endSize) - pos;
	    verbose(3,"#\tstarting gap at %d for length: %d\n", pos,
		startGapLen );
	    }
	/*	if a block is all gap for longer than minGapSize, then
 	 *	keep it all together in one large piece
	 */
	if (startGapLen > minGapSize)
	    {
	    if (noGapDrops)
		{
		writeOneByGap(oneFile, outRoot, digits, &pieceIx,
		    f, noPath, pos, startGapLen, &seq, lift,
			&writeCount, fileName);
		}
	    else
		verbose(3,"#\tbeginning gap of %d size skipped\n", startGapLen);
	    thisSize = startGapLen;
	    }
	else if (thisSize > 0 && bitCountRange(bits, pos, thisSize) <= maxN)
	    {
	    if (endSize>pieceSize) /* otherwise chops tiny piece at very end */
	      {
		gotGap = findLastGap(&(seq.dna[pos]), thisSize, endSize,
				     minGapSize, &gapStart, &gapSize);
		if (gotGap)
		  thisSize = gapStart;
	      }
	    writeOneByGap(oneFile, outRoot, digits, &pieceIx,
		f, noPath, pos, thisSize, &seq, lift, &writeCount, fileName);
	    }
	pos += thisSize;
	if (gotGap)
	    {
	    /*	last block is all gap, write it all out	*/
	    /*if ((pos + gapSize) >= seq.size)*/
	    if (noGapDrops)
		{
		writeOneByGap(oneFile, outRoot, digits, &pieceIx,
		    f, noPath, pos, gapSize, &seq ,lift, &writeCount, fileName);
		verbose(3,
		    "#\tadding gapSize %d to pos %d -> %d and writing gap\n",
			gapSize, pos, pos+gapSize);
		}
	    else
		verbose(3,"#\tadding gapSize %d to pos %d -> %d\n",
			gapSize, pos, pos+gapSize);
	    pos += gapSize;
	    }
	}
    bitFree(&bits);
    }
carefulClose(&f);
carefulClose(&lift);
lineFileClose(&lf);
printf("%d pieces of %d written\n", writeCount, pieceIx);
}
void hgLoadChromGraph(boolean doLoad, char *db, char *track, char *fileName)
/* hgLoadChromGraph - Load up chromosome graph. */
{
    double minVal,maxVal;
    struct chromGraph *el, *list;
    FILE *f;
    char *tempDir = ".";
    char path[PATH_LEN], gbdbPath[PATH_LEN];
    char *idTable = optionVal("idTable", NULL);
    char *pathPrefix = NULL;

    if (idTable == NULL)
        list = chromGraphLoadAll(fileName);
    else
        list = chromGraphListWithTable(fileName, db, idTable);
    if (list == NULL)
        errAbort("%s is empty", fileName);

    /* Figure out min/max values */
    minVal = maxVal = list->val;
    for (el = list->next; el != NULL; el = el->next)
    {
        if (optionExists("minusLog10"))
        {
            if (el->val == 1)
                el->val = 0;
            else if (el->val > 0)
                el->val = -1 * log(el->val)/log(10);
        }
        if (el->val < minVal)
            minVal = el->val;
        if (el->val > maxVal)
            maxVal = el->val;
    }


    /* Sort and write out temp file. */
    slSort(&list, chromGraphCmp);
    f = hgCreateTabFile(tempDir, track);
    for (el = list; el != NULL; el = el->next)
        chromGraphTabOut(el, f);

    if (doLoad)
    {
        struct dyString *dy = dyStringNew(0);
        struct sqlConnection *conn;

        /* Set up connection to database and create main table. */
        conn = hAllocConn(db);
        sqlDyStringPrintf(dy, createString, track, hGetMinIndexLength(db));
        sqlRemakeTable(conn, track, dy->string);

        /* Load main table and clean up file handle. */
        hgLoadTabFile(conn, tempDir, track, &f);
        hgRemoveTabFile(tempDir, track);

        /* If need be create meta table.  If need be delete old row. */
        if (!sqlTableExists(conn, "metaChromGraph"))
            sqlUpdate(conn, metaCreateString);
        else
        {
            dyStringClear(dy);
            sqlDyStringPrintf(dy, "delete from metaChromGraph where name = '%s'",
                              track);
            sqlUpdate(conn, dy->string);
        }

        /* Make chrom graph file */
        safef(path, sizeof(path), "%s.cgb", track);
        chromGraphToBin(list, path);
        safef(path, sizeof(path), "/gbdb/%s/chromGraph", db);
        pathPrefix = optionVal("pathPrefix", path);
        safef(gbdbPath, sizeof(gbdbPath), "%s/%s.cgb", pathPrefix, track);

        /* Create new line in meta table */
        dyStringClear(dy);
        sqlDyStringPrintf(dy, "insert into metaChromGraph values('%s',%f,%f,'%s');",
                          track, minVal, maxVal, gbdbPath);
        sqlUpdate(conn, dy->string);
    }
}
Exemple #24
0
void hgExpDistance(char *database, char *posTable, char *expTable, char *outTable)
/* hgExpDistance - Create table that measures expression distance between pairs. */
{
struct sqlConnection *conn = sqlConnect(database);
struct sqlResult *sr;
char query[256];
char **row;
struct hash *expHash = hashNew(16);
int realExpCount = -1;
struct microData *geneList = NULL, *curGene, *gene;
int geneIx, geneCount = 0;
struct microData **geneArray = NULL;
float *weights = NULL;
char *tempDir = ".";
FILE *f = hgCreateTabFile(tempDir, outTable);
long time1, time2;

time1 = clock1000();

/* Get list/hash of all items with expression values. */

/* uglyf("warning: temporarily limited to 1000 records\n"); */

sqlSafef(query, sizeof(query), "select name,expCount,expScores from %s", posTable);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    char *name = row[0];
    if (!hashLookup(expHash, name))
	{
	int expCount = sqlUnsigned(row[1]);
	int commaCount;
	float *expScores = NULL;

	sqlFloatDynamicArray(row[2], &expScores, &commaCount);
	if (expCount != commaCount)
	    errAbort("expCount and expScores don't match on %s in %s", name, posTable);
	if (realExpCount == -1)
	    realExpCount = expCount;
	if (expCount != realExpCount)
	    errAbort("In %s some rows have %d experiments others %d", 
	    	name, expCount, realExpCount);
	AllocVar(gene);
	gene->expCount = expCount;
	gene->expScores = expScores;
	hashAddSaveName(expHash, name, gene, &gene->name);
	slAddHead(&geneList, gene);
	}
    }
sqlFreeResult(&sr);
conn = sqlConnect(database);
slReverse(&geneList);
geneCount = slCount(geneList);
printf("Have %d elements in %s\n", geneCount, posTable);

weights = getWeights(realExpCount);

if (optionExists("lookup"))
    geneList = lookupGenes(conn, optionVal("lookup", NULL), geneList);
geneCount = slCount(geneList);
printf("Got %d unique elements in %s\n", geneCount, posTable);

sqlDisconnect(&conn);	/* Disconnect because next step is slow. */


if (geneCount < 1)
    errAbort("ERROR: unique gene count less than one ?");

time2 = clock1000();
verbose(2, "records read time: %.2f seconds\n", (time2 - time1) / 1000.0);

/* Get an array for sorting. */
AllocArray(geneArray, geneCount);
for (gene = geneList,geneIx=0; gene != NULL; gene = gene->next, ++geneIx)
    geneArray[geneIx] = gene;

/* Print out closest 1000 in tab file. */
for (curGene = geneList; curGene != NULL; curGene = curGene->next)
    {
    calcDistances(curGene, geneList, weights);
    qsort(geneArray, geneCount, sizeof(geneArray[0]), cmpMicroDataDistance);
    for (geneIx=0; geneIx < 1000 && geneIx < geneCount; ++geneIx)
        {
	gene = geneArray[geneIx];
	fprintf(f, "%s\t%s\t%f\n", curGene->name, gene->name, gene->distance);
	}
    dotOut();
    }

printf("Made %s.tab\n", outTable);

time1 = time2;
time2 = clock1000();
verbose(2, "distance computation time: %.2f seconds\n", (time2 - time1) / 1000.0);

/* Create and load table. */
conn = sqlConnect(database);
distanceTableCreate(conn, outTable);
hgLoadTabFile(conn, tempDir, outTable, &f);
printf("Loaded %s\n", outTable);

/* Add indices. */
sqlSafef(query, sizeof(query), "alter table %s add index(query(12))", outTable);
sqlUpdate(conn, query);
printf("Made query index\n");
if (optionExists("targetIndex"))
    {
    sqlSafef(query, sizeof(query), "alter table %s add index(target(12))", outTable);
    sqlUpdate(conn, query);
    printf("Made target index\n");
    }

hgRemoveTabFile(tempDir, outTable);

time1 = time2;
time2 = clock1000();
verbose(2, "table create/load/index time: %.2f seconds\n", (time2 - time1) / 1000.0);

}
void hgExpDistance(char *database, char *posTable, char *expTable, char *outTable)
/* hgExpDistance - Create table that measures expression distance between pairs. */
{
struct sqlConnection *conn = sqlConnect(database);
struct sqlResult *sr;
char query[256];
char **row;
struct hash *expHash = hashNew(16);
int realExpCount = -1;
struct microData *gene;
int rc, t;
pthread_t *threads = NULL;
pthread_attr_t attr;
int *threadID = NULL;
void *status;
char *tempDir = ".";
long time1, time2;

time1 = clock1000();

/* Get list/hash of all items with expression values. */
sqlSafef(query, sizeof(query), "select name,expCount,expScores from %s", posTable);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    char *name = row[0];
    if (!hashLookup(expHash, name))
	{
	int expCount = sqlUnsigned(row[1]);
	int commaCount;
	float *expScores = NULL;

	sqlFloatDynamicArray(row[2], &expScores, &commaCount);
	if (expCount != commaCount)
	    errAbort("expCount and expScores don't match on %s in %s", name, posTable);
	if (realExpCount == -1)
	    realExpCount = expCount;
	if (expCount != realExpCount)
	    errAbort("In %s some rows have %d experiments others %d", 
	    	name, expCount, realExpCount);
	AllocVar(gene);
	gene->expCount = expCount;
	gene->expScores = expScores;
	hashAddSaveName(expHash, name, gene, &gene->name);
	slAddHead(&geneList, gene);
	}
    }
sqlFreeResult(&sr);
conn = sqlConnect(database);
slReverse(&geneList);
geneCount = slCount(geneList);
printf("Have %d elements in %s\n", geneCount, posTable);

weights = getWeights(realExpCount);

if (optionExists("lookup"))
    geneList = lookupGenes(conn, optionVal("lookup", NULL), geneList);
geneCount = slCount(geneList);
printf("Got %d unique elements in %s\n", geneCount, posTable);

sqlDisconnect(&conn);	/* Disconnect because next step is slow. */


if (geneCount < 1)
    errAbort("ERROR: unique gene count less than one ?");

time2 = clock1000();
verbose(2, "records read time: %.2f seconds\n", (time2 - time1) / 1000.0);

f = hgCreateTabFile(tempDir, outTable);

/* instantiate threads */
AllocArray( threadID, numThreads );
AllocArray( threads, numThreads );
pthread_attr_init( &attr );
pthread_mutex_init( &mutexfilehandle, NULL );
pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_JOINABLE );

for (t = 0; t < numThreads; t++) {
	threadID[t] = t;
	rc = pthread_create( &threads[t], &attr, computeDistance, 
						(void *) &threadID[t]);
	if (rc)
		errAbort("ERROR: in pthread_create() %d\n", rc );
} 

/* synchronize all threads */
for (t = 0; t < numThreads; t++) {
	rc = pthread_join( threads[t], &status);
	if (rc)
		errAbort("ERROR: in pthread_join() %d\n", rc );
} 

printf("Made %s.tab\n", outTable);

slFreeList( &geneList );

pthread_mutex_destroy( &mutexfilehandle );
pthread_attr_destroy( &attr );

time1 = time2;
time2 = clock1000();
verbose(2, "distance computation time: %.2f seconds\n", (time2 - time1) / 1000.0);

/* Create and load table. */
conn = sqlConnect(database);
distanceTableCreate(conn, outTable);
hgLoadTabFile(conn, tempDir, outTable, &f);
printf("Loaded %s\n", outTable);

/* Add indices. */
sqlSafef(query, sizeof(query), "alter table %s add index(query(12))", outTable);
sqlUpdate(conn, query);
printf("Made query index\n");
if (optionExists("targetIndex"))
    {
    sqlSafef(query, sizeof(query), "alter table %s add index(target(12))", outTable);
    sqlUpdate(conn, query);
    printf("Made target index\n");
    }

hgRemoveTabFile(tempDir, outTable);

time1 = time2;
time2 = clock1000();
verbose(2, "table create/load/index time: %.2f seconds\n", (time2 - time1) / 1000.0);

}
Exemple #26
0
void consForBed() 
/* Open and read the bed file. Load consFile into an double 
   array for easy access and process. */
{
char *bedFileName = NULL;
char *chrom = NULL;
struct bed *bedList = NULL, *bed = NULL;
char *consFileName = NULL;
int *consProb = NULL;

char *consBedName = NULL;
FILE *consBedOut = NULL;
char *summaryBedName = NULL;
FILE *summaryBedOut = NULL;

/* Get the output file names. */
consBedName = optionVal("bedConsOut", NULL);
if(consBedName == NULL)
    errAbort("Must specify an output file for bed conservation.");

summaryBedName = optionVal("summary", NULL);

/* What chromosome are we on? */
chrom = optionVal("chrom", NULL);
if(chrom == NULL)
    errAbort("Must specify a chromosome.");

/* read in the beds. */
warn("Reading in beds.");
bedFileName = optionVal("bedFile", NULL);
if(bedFileName != NULL)
    bedList = bedLoadAll(bedFileName);
else
    errAbort("Must specify a bedFile.\n");

/* Read in the conservation scores. */
consFileName = optionVal("consFile", NULL);
if(consFileName != NULL)
    consProb = readInConservationVals(consFileName);
else
    errAbort("Must specify a conservation file.");

/* Open output files */
consBedOut = mustOpen(consBedName,"w");
if(summaryBedName != NULL)
    summaryBedOut = mustOpen(summaryBedName, "w");

/* Process each individual bed. */
warn("Writing out conservation for beds.");
for(bed = bedList; bed != NULL; bed = bed->next)
    {
    if(differentString(chrom, bed->chrom))
	continue;
    outputBedConservation(bed, consProb, consBedOut, summaryBedOut);
    }
warn("Cleaning up");
carefulClose(&consBedOut);
carefulClose(&summaryBedOut);
freez(&consProb);
warn("Done.");
}
Exemple #27
0
void xmfaToMaf(char *in, char *out)
/* xmfaToMaf - Convert from xmfa to maf format. */
{
int c;
FILE *input  = mustOpen(in,  "r");
FILE *output = mustOpen(out, "w");

char* commentLine;
struct dnaSeq* sequence;

struct mafAli *ali;

struct sqlConnection* conn = hAllocConn();

mafWriteStart(output, "mlagan");

AllocVar(ali);
while(myFaReadMixedNext(input, TRUE, "default name", TRUE, &commentLine, &sequence)) {
    char srcName[128];
    
    c = fgetc(input);
    if(c == '=' || c == '>') { /* add the current sequence and process the block if we've see an '='*/
        char org[32];
        char chrom[32];
        int start;
        int stop;
        char strand;
        struct mafComp *comp;
        double score;

        char buffer[1024];

        ungetc(c, input);
        
        AllocVar(comp);
        /* parse the comment line */
        sscanf(commentLine, ">%s %[^:]:%d-%d %c", org, chrom, &start, &stop, &strand);
        /* build the name */
        safef(srcName, sizeof(srcName), "%s.%s", optionVal(org, org), chrom);
        comp->src = cloneString(srcName);

        sqlSafef(buffer, 1024, "SELECT size FROM %s.chromInfo WHERE chrom = \"%s\"", optionVal(org, org), chrom);
        assert(sqlQuickQuery(conn, buffer, buffer, 1024) != 0);
        comp->srcSize = atoi(buffer);

        comp->strand = strand;

        start = start - 1;

        comp->start = start;
        comp->size = ungappedSize(sequence);

        if(strand == '-')
            comp->start = comp->srcSize - (comp->start + comp->size);
        
        comp->text = sequence->dna;
        sequence->dna = 0;
        slAddHead(&ali->components, comp);
        freeDnaSeq(&sequence);

        if(c == '=') {
            fscanf(input, "= score=%lf\n", &score);

            ali->score = score;

            slReverse(&ali->components);
            mafWrite(output, ali);
            mafAliFree(&ali);

            AllocVar(ali);
        }
    }
}

mafWriteEnd(output);
}