Beispiel #1
0
static void genePredToMafFrames(char *targetDb, char *mafFile, char *mafFramesFile,
                                int numGeneDbs, char **geneDbs, char **genePreds,
                                char *bedFile)
/* create mafFrames tables from genePreds  */
{
/* get list of organisms and their genes */
struct orgGenes *orgs = loadGenePreds(numGeneDbs, geneDbs, genePreds);
struct orgGenes *genes;

mkMafFramesForMaf(targetDb, orgs, mafFile);

if (verboseLevel() >= 4)
    dumpGeneInfo("after load", orgs);
for (genes = orgs; genes != NULL; genes = genes->next)
    splitMultiMappings(genes);
if (verboseLevel() >= 5)
    dumpGeneInfo("after split", orgs);
for (genes = orgs; genes != NULL; genes = genes->next)
    finishMafFrames(genes);
if (verboseLevel() >= 5)
    dumpGeneInfo("after finish", orgs);
outputFrames(orgs, mafFramesFile, bedFile);

orgGenesFree(&genes);
}
void addVersion(boolean strict, char *database, char *dirName, char *raName,
                struct hash *uniqHash,
                struct hash *htmlHash,
                struct hgFindSpec **pSpecList)
/* Read in specs from raName and add them to list/database if new. */
{
    struct hgFindSpec *hfsList = NULL, *hfs = NULL;
    struct hgFindSpec *hfsNext = NULL;

    hfsList = hgFindSpecFromRa(database, raName, NULL);

    /* prune records of the incorrect release */
    hfsList = pruneRelease(hfsList);

    if (strict)
    {
        for (hfs = hfsList; hfs != NULL; hfs = hfsNext)
        {
            hfsNext = hfs->next;
            if (! hTableOrSplitExists(database, hfs->searchTable))
            {
                if (verboseLevel() > 1)
                    printf("%s missing\n", hfs->searchTable);
                slRemoveEl(&hfsList, hfs);
            }
            else if (hfs->xrefTable[0] != 0)
            {
                // Use sqlTableExists because xrefTable might be $db.$table,
                // not supported by hTableExists / hTableOrSplitExists
                // NOTE hfs->xrefTable can sometimes contain a comma-separated table list,
                // rather than just a single table.
                struct sqlConnection *conn = hAllocConn(database);
                char *tables = replaceChars(hfs->xrefTable, ",", " ");
                boolean exists = sqlTablesExist(conn, tables);
                hFreeConn(&conn);
                freeMem(tables);
                if (! exists)
                {
                    if (verboseLevel() > 1)
                        printf("%s (xref) missing\n", hfs->xrefTable);
                    slRemoveEl(&hfsList, hfs);
                }
            }
        }
    }

    for (hfs = hfsList; hfs != NULL; hfs = hfsNext)
    {
        hfsNext = hfs->next;
        if (! hashLookup(uniqHash, hfs->searchName))
        {
            hashAdd(uniqHash, hfs->searchName, hfs);
            slAddHead(pSpecList, hfs);
        }
    }
}
Beispiel #3
0
void hgLoadOutJoined(char *database, int rmskCount, char *rmskFileNames[], char *suffix)
/* hgLoadOutJoined - load RepeatMasker .out files into database. */
{
struct sqlConnection *conn = NULL;
int i;

if (tabFileName == NULL)
    {
    conn = hAllocConn(database);
    verbose(2,"#\thgLoadOutJoined: connected to database: %s\n", database);
    }
for (i=0; i<rmskCount; ++i)
    {
    readOneOut(rmskFileNames[i]);
    }
closeFiles();
if (tabFileName == NULL)
    {
    loadOneTable(database, conn, defaultTempName, suffix);
    }
hFreeConn(&conn);
if (badRepCnt > 0)
    {
    warn("note: %d records dropped due to repEnd < 0 or repStart > repEnd\n", badRepCnt);
    if (verboseLevel() < 2)
        warn("      run with -verbose=2 for details\n");
    }
}
Beispiel #4
0
int main(int argc, char* argv[])
/* parse command line */
{
int numMrnas;
int numEsts;
char *accList, *selectAccFile;
char *database, *relName, *outDir;

verboseSetLevel(0);
optionInit(&argc, argv, optionSpecs);
if (argc != 4)
    usage();
relName = argv[1];
database = argv[2];
outDir = argv[3];
numMrnas = optionInt("numMrnas", 10);
numEsts = optionInt("numEsts", 10);
accList = optionVal("accList",  NULL);
selectAccFile = optionVal("selectAcc",  NULL);
gbVerbInit(optionInt("verbose", 0));
if (verboseLevel() > 0)
    setlinebuf(stderr);

getTestSubset(numMrnas, numEsts, accList, selectAccFile,
              relName, database, outDir);

return 0;
}
Beispiel #5
0
void AgentF::printSettings()
{
  QString txt = "  %1 = %2";
  int width = -20; // Negative value = left-aligned

  print(tr("Infos:"));
  print(txt.arg("Postgres version", width).arg(mFilu->serverVersion()));
  print(txt.arg("Qt version", width).arg(qVersion()));
  print(txt.arg("muParser version", width).arg(mu::Parser().GetVersion().data()));
  print(txt.arg("Settings file", width).arg(mRcFile->fileName()));
  print(txt.arg("Fallback file", width).arg(mRcFile->fallbackFile()));
  print(txt.arg("FiluHome", width).arg(mRcFile->getPath("FiluHome")));
  print(txt.arg("LogFile", width).arg(mRcFile->getUrl("LogFile")));
  print("");

  mFilu->printConfigKeys();

  print(tr("AgentF Config Keys:"));
  print(txt.arg("MaxClones", width).arg(mRcFile->getIT("MaxClones")));
  print(txt.arg("MinJobsPerClone", width).arg(mRcFile->getIT("MinJobsPerClone")));
  print(txt.arg("MaxTimeLag", width).arg(mRcFile->getIT("MaxTimeLag")));
  print(txt.arg("MinTimeLag", width).arg(mRcFile->getIT("MinTimeLag")));
  print(txt.arg("ProviderPath", width).arg(mRcFile->getPath("ProviderPath")));
  print(txt.arg("Verbose", width).arg(verboseLevel()));
  print("");
}
Beispiel #6
0
static void ccdsMkTables(char *ccdsDb, char *hgDb, int ccdsBuildId, char *ccdsInfoOut, char *ccdsNotesOut, char *ccdsGeneOut)
/* create tables for hg db from imported CCDS database */
{
if (verboseLevel() >= 2)
    sqlMonitorEnable(JKSQL_TRACE);
struct sqlConnection *ccdsConn = ccdsSqlConn(ccdsDb);
struct genomeInfo *genome = getGenomeInfo(hgDb, ccdsBuildId);
struct hash *infoCcds = hashNew(20);
struct hash *geneCcds = hashNew(20);
struct hash* ignoreTbl = buildIgnoreTbl(ccdsConn, genome);

char ccdsInfoFile[PATH_LEN], ccdsInfoTbl[PATH_LEN];
ccdsGetTblFileNames(ccdsInfoOut, ccdsInfoTbl, ccdsInfoFile);
createCcdsInfo(ccdsConn, ccdsInfoFile, genome, ignoreTbl, infoCcds);

char ccdsNotesFile[PATH_LEN], ccdsNotesTbl[PATH_LEN];
ccdsGetTblFileNames(ccdsNotesOut, ccdsNotesTbl, ccdsNotesFile);
createCcdsNotes(ccdsConn, ccdsNotesFile, genome, infoCcds);

char ccdsGeneFile[PATH_LEN], ccdsGeneTbl[PATH_LEN];
ccdsGetTblFileNames(ccdsGeneOut, ccdsGeneTbl, ccdsGeneFile);
createCcdsGene(ccdsConn, ccdsGeneFile, genome, ignoreTbl, geneCcds);

sqlDisconnect(&ccdsConn);
sqlMonitorDisable();

gotCcdsValidate(infoCcds, geneCcds);

if (loadDb)
    loadTables(hgDb, ccdsInfoTbl, ccdsInfoFile, ccdsGeneTbl, ccdsGeneFile, ccdsNotesTbl, ccdsNotesFile);
}
Beispiel #7
0
int main(int argc, char *argv[])
/* Process command line. */
{
struct tm *tm;

if (argc < 2)
    usage();
optionInit(&argc, argv, options);

if (!optionExists("age"))
    {
    verbose(1,"ERROR: must specify an age argument, e.g.: -age=8\n");
    usage();
    }
if (time(&timeNow) < 1)
    errAbort("can not obtain current time via time() function\n");
tm = localtime(&timeNow);
ageHours = optionFloat("age", 0.0);
time_t ageSeconds = (time_t)(ageHours * 3600);	/*	age in seconds	*/
dropTime = timeNow - ageSeconds;
if (ageHours > 0.0)
    {
    verbose(2,"#	specified age = %f hours = %ld seconds\n", ageHours,
	(unsigned long)ageSeconds);
    verbose(2,"#	current time: %d-%02d-%02d %02d:%02d:%02d %ld\n",
	1900+tm->tm_year, 1+tm->tm_mon, tm->tm_mday,
        tm->tm_hour, tm->tm_min, tm->tm_sec, (unsigned long)timeNow);
    tm = localtime(&dropTime);
    verbose(2,"#	   drop time: %d-%02d-%02d %02d:%02d:%02d %ld\n",
	1900+tm->tm_year, 1+tm->tm_mon, tm->tm_mday,
        tm->tm_hour, tm->tm_min, tm->tm_sec, (unsigned long)dropTime);
    }
else
    {
    verbose(1,"ERROR: specified age %.f must be greater than 0.0\n", ageHours);
    usage();
    }
drop = optionExists("drop");
historyToo = optionExists("historyToo");
db = optionVal("db",db);
extFileCheck = optionExists("extFile");
extDel = optionExists("extDel");
tableStatus = optionExists("tableStatus");
topDir = optionVal("topDir", topDir);
verbose(2,"#	drop requested: %s\n", drop ? "TRUE" : "FALSE");
verbose(2,"#	    historyToo: %s\n", historyToo ? "TRUE" : "FALSE");
verbose(2,"#	       extFile: %s\n", extFileCheck ? "TRUE" : "FALSE");
verbose(2,"#	        extDel: %s\n", extDel ? "TRUE" : "FALSE");
verbose(2,"#	   tableStatus: %s\n", tableStatus ? "TRUE" : "FALSE");
verbose(2,"#	        topDir: %s\n", topDir);
verbose(2,"#	database: %s\n", db);

dbTrash(db);
if (verboseLevel() > 1)
    printVmPeak();
return 0;
}
Beispiel #8
0
bool CmdHelper::hasOpt(const QString& opt)
{
  mInqOpt = mOptions.named(opt);

  if(mInqOpt.idxPos > -1) return true;
  if(!verboseLevel(eAmple)) return false;
  if(!mOpts.contains(opt)) fatal(FUNC, QString("Test for unregistered option: %1").arg(opt));

  return false;
}
Beispiel #9
0
struct slName *getTableNames(struct sqlConnection *conn)
/* Return a list of names of tables that have not been excluded by 
 * command line options. */
{
char *query = hoursOld ? "NOSQLINJ show table status" : "NOSQLINJ show tables";
struct sqlResult *sr = sqlGetResult(conn, query);
struct slName *tableList = NULL;
char **row = NULL;
int startTime = clock1();
int ageThresh = hoursOld * 3600;

while((row = sqlNextRow(sr)) != NULL)
    {
    struct slName *tableName = NULL;
    struct slName *pat = NULL;
    boolean gotMatch = FALSE;
    if (hoursOld)
	{
	if (row[11] != NULL)
	    {
	    int tableUpdateTime = sqlDateToUnixTime(row[11]);
	    int ageInSeconds = startTime - tableUpdateTime;
	    if (ageInSeconds > ageThresh)
		continue;
	    }
	else
	    {
	    verbose(2,
		    "Got NULL update time for table %s.%s with hoursOld=%d\n",
		    sqlGetDatabase(conn), row[0], hoursOld);
	    }
	}
    for (pat = excludePatterns;  pat != NULL;  pat=pat->next)
	{
	if (wildMatch(pat->name, row[0]))
	    {
	    gotMatch = TRUE;
	    break;
	    }
	}
    if (gotMatch)
	continue;
    if (verboseLevel() >= 3 || justList)
	fprintf(stderr, "Adding %s\n", row[0]);
    tableName = newSlName(row[0]);
    slAddHead(&tableList, tableName);
    }
sqlFreeResult(&sr);
if (justList)
    exit(0);
slReverse(&tableList);
return tableList;
}
Beispiel #10
0
static struct hash* buildIgnoreTbl(struct sqlConnection *conn, struct genomeInfo *genome)
/* Build table of CCDS ids to ignore.  This currently contains:
 *   - ones that have the interpretation_subtype of "Partial match".
 * This should be doable as part of the query, but MySQL 4.0 was very, very slow at it. */
{
struct hash* ignoreTbl = hashNew(20);
findPartialMatches(conn, genome, ignoreTbl);
findReplaced(conn, genome, ignoreTbl);
if (verboseLevel() >= 3)
    dumpIgnoreTbl(ignoreTbl);
return ignoreTbl;
}
Beispiel #11
0
int main(int argc, char *argv[])
/* Process command line. */
{
optionInit(&argc, argv, options);
udcSetDefaultDir(optionVal("udcDir", udcDefaultDir()));
if (argc != 2)
    usage();
bigWigInfo(argv[1]);
if (verboseLevel() > 1)
    printVmPeak();
return 0;
}
Beispiel #12
0
bool CmdHelper::has(const QString& opt)
{
  // Don't change mInqOpt, test only if exist

  if(mOptions.has(opt)) return true;
  if(!verboseLevel(eAmple)) return false;

  if(!mOpts.contains(opt) and !mSubCmds.contains(opt))
    fatal(FUNC, QString("Test for unregistered option or subcommand: %1").arg(opt));

  return false;
}
Beispiel #13
0
static void verboseLink(struct hapRegions *hr, struct cDnaAlign *refAln, struct cDnaAlign *hapAln)
/* verbose output on linking a refAln with a hapAln */
{
    if (verboseLevel() >= 5)
    {
        verbose(5, "link refAln ");
        cDnaAlignVerbLoc(5, refAln);
        verbose(5, " to hapAln ");
        cDnaAlignVerbLoc(5, hapAln);
        verbose(5, "\n");
    }
}
Beispiel #14
0
struct blastQuery *blastFileNextQuery(struct blastFile *bf)
/* Read all alignments associated with next query.  Return NULL at EOF. */
{
char *line;
struct blastQuery *bq;
struct blastGappedAli *bga;
AllocVar(bq);

verbose(TRACE_LEVEL, "blastFileNextQuery\n");

/* find and parse Query= */
line = bfSearchForLine(bf, "Query=");
if (line == NULL)
    return NULL;
parseQueryLines(bf, line, bq);

/* find and parse Database: */
line = bfSearchForLine(bf, "Database:");
if (line == NULL)
    bfUnexpectedEof(bf);
parseDatabaseLines(bf, line, bq);

/* Seek to beginning of first gapped alignment. */
for (;;)
    {
    line = bfNeedNextLine(bf);
    if (line[0] == '>')
	{
	lineFileReuse(bf->lf);
	break;
	}
    else if (isRoundLine(line))
        parseRoundLine(line, bq);
    else if (stringIn("No hits found", line) != NULL)
        break;
    }

/* Read in gapped alignments. */
while ((bga = blastFileNextGapped(bf, bq)) != NULL)
    {
    slAddHead(&bq->gapped, bga);
    }
slReverse(&bq->gapped);
if (verboseLevel() >= DUMP_LEVEL)
    {
    verbose(DUMP_LEVEL, "blastFileNextQuery result:\n");
    blastQueryPrint(bq, stderr);
    }
return bq;
}
Beispiel #15
0
int main(int argc, char *argv[])
/* Process command line. */
{
optionInit(&argc, argv, optionSpecs);

subDirs = optionVal("subDirs", cloneString(subDirs));
if (verboseLevel() > 1)
    {
    verbose(2,"#\tsubDirs: %s\n", subDirs);
    }
if (argc != 5)
    usage();
agpToFa(argv[1], argv[2], argv[3], argv[4]);
return 0;
}
Beispiel #16
0
bool CmdHelper::hasSubCmd(const QString& cmd)
{
  mInqOpt = mOptions.named(cmd);

  if(mInqOpt.idxPos > -1)
  {
    mInqSubCmd = mInqOpt;
    return true;
  }

  if(!verboseLevel(eAmple)) return false;
  if(!mSubCmds.contains(cmd)) fatal(FUNC, QString("Test for unregistered subcommand: %1").arg(cmd));

  return false;
}
Beispiel #17
0
boolean checkRepeat(struct rmskOut2 *r, struct lineFile *lf)
/* check for bogus repeat */
{
/* this is bogus on both strands */
if (r->repStart > r->repEnd)
    {
    badRepCnt++;
    if (verboseLevel() > 1)
        {
        verbose(2, "bad rep range [%d, %d] line %d of %s %s:%d-%d\n",
		r->repStart, r->repEnd, lf->lineIx, lf->fileName, r->genoName, r->genoStart, r->genoEnd);
        }
    return FALSE;
    }
return TRUE;
}
int main(int argc, char *argv[])
/* Process command line. */
{
optionInit(&argc, argv, options);
maxGigs = optionInt("maxGigs", maxGigs);
setMaxAlloc(maxGigs*1000000000L);  
blockSize = optionInt("blockSize", blockSize);
itemsPerSlot = optionInt("itemsPerSlot", itemsPerSlot);
doCompress = !optionExists("unc");
if (argc != 4)
    usage();
bedGraphToBigWig(argv[1], argv[2], argv[3]);
if (verboseLevel() > 1)
    printVmPeak();
return 0;
}
Beispiel #19
0
int main(int argc, char *argv[])
/* Process command line. */
{
optionInit(&argc, argv, options);
clChrom = optionVal("chrom", clChrom);
clStart = optionInt("start", clStart);
clEnd = optionInt("end", clEnd);
maxItems = optionInt("maxItems", maxItems);
udcSetDefaultDir(optionVal("udcDir", udcDefaultDir()));
if (argc != 3)
    usage();
bigBedToBed(argv[1], argv[2]);
if (verboseLevel() > 1)
    printVmPeak();
return 0;
}
Beispiel #20
0
int main(int argc, char *argv[])
/* Process command line. */
{
optionInit(&argc, argv, options);
blockSize = optionInt("blockSize", blockSize);
itemsPerSlot = optionInt("itemsPerSlot", itemsPerSlot);
clipDontDie = optionExists("clip");
doCompress = !optionExists("unc");
keepAllChromosomes = optionExists("keepAllChromosomes");
fixedSummaries = optionExists("fixedSummaries");
if (argc != 4)
    usage();
wigToBigWig(argv[1], argv[2], argv[3]);
if (verboseLevel() > 1)
    printVmPeak();
return 0;
}
Beispiel #21
0
static struct hash *readLift(char *liftAcross)
/* read in liftAcross file, create hash of srcName as hash key,
 *	hash elements are simple lists of coordinate relationships
 *	return them all sorted by start position
 */
{
char *row[6];
struct hash *result = newHash(8);
struct hashEl *hel = NULL;
struct lineFile *lf = lineFileOpen(liftAcross, TRUE);
while (lineFileNextRow(lf, row, ArraySize(row)))
    {
    struct liftSpec *liftSpec;
    hel = hashStore(result, row[0]);		/* srcName hash	*/
    AllocVar(liftSpec);
    liftSpec->start = sqlUnsigned(row[1]);	/* src start	*/
    liftSpec->end = sqlUnsigned(row[2]);	/* src end	*/
    liftSpec->dstName = cloneString(row[3]);	/* dstName	*/
    liftSpec->dstStart = sqlUnsigned(row[4]);	/* dst start	*/
    liftSpec->strand = '+';			/* dst strand	*/
    if ('-' == *row[5])
	liftSpec->strand = '-';
    /* accumulate list of lift specs under the srcName hash	*/
    slAddHead(&(hel->val), liftSpec);
    }

/*	Go through each srcName in the hash, and sort the list there by
 *	the start coordinate of each item.  The searching will expect
 *	them to be in order.
 */
struct hashCookie cookie = hashFirst(result);
while ((hel = hashNext(&cookie)) != NULL)
    {
    slSort(&(hel->val), lsStartCmp);
    if (verboseLevel() > 2)
	{
	struct liftSpec *ls;
	for (ls = hel->val; ls != NULL; ls = ls->next)
	    verbose(3, "# %s\t%d\t%d\t%s\t%d\t%c\n", hel->name, ls->start,
		ls->end, ls->dstName, ls->dstStart, ls->strand);
	}
    }

return result;
}
Beispiel #22
0
int main(int argc, char *argv[])
/* Process command line. */
{
    optionInit(&argc, argv, options);
    udcSetDefaultDir(optionVal("udcDir", udcDefaultDir()));
    if (optionExists("fields"))
    {
        if (argc < 2)
            usage();
        bigBedFields(argv[1]);
    }
    else
    {
        summaryType = optionVal("type", summaryType);
        if (argc != 6)
            usage();
        bigBedSummary(argv[1], argv[2], sqlUnsigned(argv[3]), sqlUnsigned(argv[4]), sqlUnsigned(argv[5]));
    }
    if (verboseLevel() > 1)
        printVmPeak();
    return 0;
}
Beispiel #23
0
void findEntries(int numAccs, unsigned type, struct gbRelease* release,
                 struct numRange* versions, struct numRange* modDates,
                 unsigned flags, unsigned orgCats,
                 struct hash* accTbl, int* accCount)
/* find entries to copy based on number of versions and/or modDates.
 * Specify NULL to not use criteria */
{
/* scan by update to help minimize number of updates (by grouping) */
struct gbUpdate* update;
int localAccCount = 0;

if (verboseLevel() > 1)
    {
    fprintf(stderr, "findEntries: num=%d", numAccs);
    if (flags & FE_FULL)
        fprintf(stderr, " full");
    if (flags & FE_DAILY)
        fprintf(stderr, " daily");
    fprintf(stderr, " %s", gbFmtSelect(type|orgCats));
    if (versions != NULL)
        fprintf(stderr, " numVers=%d-%d", versions->minNum, versions->maxNum);
    if (modDates != NULL)
        fprintf(stderr, " numModDates=%d-%d", modDates->minNum, modDates->maxNum);
    fprintf(stderr, "\n");
    }

for (update = release->updates; (update != NULL) && (localAccCount < numAccs);
     update = update->next)
    {
    if ((update->isFull && (flags & FE_FULL))
        || (!update->isFull && (flags & FE_DAILY)))
        findInUpdate(numAccs, type, release, versions,  modDates,
                     flags, orgCats, update, accTbl, &localAccCount);
    }
(*accCount) += localAccCount;
verbose(1, "  found: %d entries\n", localAccCount);

}
Beispiel #24
0
int main(int argc, char *argv[])
{
optionInit(&argc, argv, optionSpecs);

if(argc < 2)
    usage();

convolve_count = optionInt("count", 4);
logs = optionExists("logs");
html = optionExists("html");

if (verboseLevel() >= 2)
    {
    printf("options: -verbose, input file(s):\n");
    printf("-count=%d\n", convolve_count);
    printf("data input is in %s format\n", logs ? "log" : "probability");
    if (html) printf ("output in html format\n");
    }

convolve(argc, argv);

return(0);
}
Beispiel #25
0
static void layerOnHtml(char *dirName, struct trackDb *tdbList, char *database)
/* Read in track HTML call bottom-up. */
{
char fileName[512];
struct trackDb *td;
for (td = tdbList; td != NULL; td = td->next)
    {
    if (isEmpty(td->html))
        {
        char *htmlName = trackDbSetting(td, "html");
        if (htmlName == NULL)
            htmlName = td->track;
	safef(fileName, sizeof(fileName), "%s/%s.html", dirName, htmlName);
	if (fileExists(fileName))
            {
	    td->html = readHtmlRecursive(fileName, database);
            // Check for note ASCII characters at higher levels of verboseness.
            // Normally, these are acceptable ISO-8859-1 characters
            if  ((verboseLevel() >= 2) && hasNonAsciiChars(td->html))
                verbose(2, "Note: non-printing or non-ASCII characters in %s\n", fileName);
            }
        }
    }
}
Beispiel #26
0
/*	convolve() - perform the task on the input data
 *	I would like to rearrange this business here, and instead of
 *	reading in the data and leaving it in the hash for all other
 *	routines to work with, it would be best to get it immediately
 *	into an array.  That makes the work of the other routines much
 *	easier.
 */
static void convolve(int argc, char *argv[])
{
int i;
struct lineFile *lf;			/* for line file utilities	*/

for (i = 1; i < argc; ++i)
    {
    int lineCount = 0;			/* counting input lines	*/
    char *line = (char *)NULL;		/* to receive data input line	*/
    char *words[128];			/* to split data input line	*/
    int wordCount = 0;			/* result of split	*/
    struct hash *histo0;	/*	first histogram	*/
    struct hash *histo1;	/*	second histogram	*/
    int medianBin0 = 0;		/*	bin at median for histo0	*/
    double medianLog_2 = -500.0;	/*	log at median	*/
    int bin = 0;		/*	0 to N-1 for N bins	*/
    int convolutions = 0;	/*	loop counter for # of convolutions */

    histo0 = newHash(0);

    lf = lineFileOpen(argv[i], TRUE);	/*	input file	*/
    verbose(1, "Processing %s\n", argv[1]);
    while (lineFileNext(lf, &line, NULL))
	{
	int j;			/*	loop counter over words	*/
	int inputValuesCount = 0;
	struct histoGram *hg;	/*	an allocated hash element	*/

	++lineCount;
	chopPrefixAt(line, '#'); /* ignore any comments starting with # */
	if (strlen(line) < 3)	/*	anything left on this line ? */
	    continue;		/*	no, go to next line	*/
	wordCount = chopByWhite(line, words, 128);
	if (wordCount < 1)
warn("Expecting at least a word at line %d, file: %s, found %d words",
	lineCount, argv[i], wordCount);
	if (wordCount == 128)
warn("May have more than 128 values at line %d, file: %s", lineCount, argv[i]);

	verbose(2, "Input data read from file: %s\n", argv[i]);
	for (j = 0; j < wordCount; ++j)
	    {
	    char binName[128];
	    double dataValue;
	    double probInput;
	    double log_2;
	    dataValue = strtod(words[j], NULL);
	    ++inputValuesCount;
	    if (logs)
		{
		log_2 = dataValue;
		probInput = pow(2.0,log_2);
		} else {
		if (dataValue > 0.0)
		    {
		    log_2 = log2(dataValue);
		    probInput = dataValue;
		    } else {
		    log_2 = -500.0;	/*	arbitrary limit	*/
		    probInput = pow(2.0,log_2);
		    }
		}
	    if (log_2 > medianLog_2)
		{
		medianLog_2 = log_2;
		medianBin0 = bin;
		}
	    verbose(2, "bin %d: %g %0.5g\n",
		    inputValuesCount-1, probInput, log_2);

	    AllocVar(hg);	/*	the histogram element	*/
	    hg->bin = bin;
	    hg->prob = probInput;
	    hg->log_2 = log_2;
	    snprintf(binName, sizeof(binName), "%d", hg->bin);
	    hashAdd(histo0, binName, hg);

	    ++bin;
	    }	/*	for each word on an input line	*/
	}	/*	for each line in a file	*/

	/*	file read complete, echo input	*/
	if (verboseLevel() >= 2)
	    printHistogram(histo0, medianBin0);

	/*	perform convolutions to specified count
	 *	the iteration does histo0 with itself to produce histo1
	 *	Then histo0 is freed and histo1 copied to it for the
	 *	next loop.
	 */
	for (convolutions = 0; convolutions < convolve_count; ++convolutions)
	    {
	    int medianBin;
	    histo1 = newHash(0);
	    medianBin = iteration(histo0, histo1);
	    if (verboseLevel() >= 2)
		printHistogram(histo1, medianBin);
	    freeHashAndVals(&histo0);
	    histo0 = histo1;
	    }

    }		/*	for each input file	*/
}	/*	convolve()	*/
Beispiel #27
0
int main(int argc, char *argv[])
/* Process command line. */
{
optionInit(&argc, argv, options);
blockSize = optionInt("blockSize", blockSize);
itemsPerSlot = optionInt("itemsPerSlot", itemsPerSlot);
asFile = optionVal("as", asFile);
doCompress = !optionExists("unc");
sizesIs2Bit = optionExists("sizesIs2Bit");
extraIndex = optionVal("extraIndex", NULL);
tabSep = optionExists("tab");
udcDir = optionVal("udcDir", udcDefaultDir());
if (argc != 4)
    usage();
udcSetDefaultDir(udcDir);

if (optionExists("type"))
    {
    // parse type
    char *btype = cloneString(optionVal("type", ""));
    char *plus = strchr(btype, '+');
    if (plus)
	{
	*plus++ = 0;
	if (isdigit(*plus))
	    bedP = sqlUnsigned(plus);
	}
    if (!startsWith("bed", btype))
	errAbort("type must begin with \"bed\"");
    btype +=3;
    bedN = sqlUnsigned(btype);
    if (bedN < 3)
	errAbort("Bed must be 3 or higher, found %d\n", bedN);
    if (bedN > 15)
	errAbort("Bed must be 15 or lower, found %d\n", bedN);
    }
else
    {
    if (asText)
	errAbort("If you specify the .as file, you must specify the -type as well so that\n"
	         "the number of standard BED columns is known.");
    }

/* If the haven't set bedN and bedP from the type var in the command line, then we sniff it
 * out from file. */
char *bedFileName = argv[1];
if (bedN == 0)
    {
    /* Just read in single line and count fields. */
    struct lineFile *lf = lineFileOpen(bedFileName, TRUE);
    char *line;
    if (!lineFileNextReal(lf, &line))
        errAbort("%s is empty", lf->fileName);
    int fieldCount;
    if (tabSep)
	fieldCount = chopByChar(line, '\t', NULL, 0); // Do not use chopString, see GOTCHA
    else
	fieldCount = chopByWhite(line, NULL, 0);
    if (fieldCount > 256)
        errAbort("Too many columns in %s, you sure it's a bed file?", lf->fileName);
    lineFileClose(&lf);

    /* Set up so that it looks like we are straight up bed for that many fields,
     * or if more than or maximum defined fields, then for bed15+ */
    bedN = fieldCount;
    if (bedN > bedKnownFields)
        {
	bedP = bedN - bedKnownFields;
	bedN = bedKnownFields;
	}
    }
   
/* Make sure that fields are defined, from bed spec if nowhere else. */
if (asFile)
    readInGulp(asFile, &asText, NULL);
else
    asText = bedAsDef(bedN,  bedN + bedP);

bedToBigBed(bedFileName, argv[2], argv[3]);
optionFree();
if (verboseLevel() > 1)
    printVmPeak();
return 0;
}
Beispiel #28
0
static void textHistogram(char *inFile)
/* textHistogram - Make a histogram in ascii. */
{
double *hist = NULL;
double *total = NULL;
char *row[256];
int wordCount;
struct lineFile *lf = lineFileOpen(inFile, TRUE);
int i,j;
int minData = maxBinCount, maxData = 0;
int totalTooBig = 0;
double maxCount = 0;
double maxCt;
double maxVal = 0;
int truncation = 0;
int begin, end;
unsigned long long totalCounts = 0;
double cpd;

/* Allocate histogram and optionally space for
 * second column totals. */
AllocArray(hist, maxBinCount);
if (aveCol >= 0)
    AllocArray(total, maxBinCount);

while (skip-- > 0)
wordCount = lineFileChop(lf, row);

/* Go through each line of input file accumulating
 * data. */
while ((wordCount = lineFileChop(lf, row)))
    {
    int x;	/*	will become the index into hist[]	*/
    if (wordCount <= col || wordCount <= aveCol)
        errAbort("Not enough words line %d of %s", lf->lineIx, lf->fileName);
    x = -1;
    if (real)	/*	for real data, work in real space to find index */
	{
	double d;
	d = lineFileNeedDouble(lf, row, col);
	if (d > maxVal)
	    maxVal = d;
	if (d >= minValR)
	    {
	    d -= minValR;
	    x = (int) floor(d / binSizeR);
	    }
	}
    else
	{
	x = lineFileNeedNum(lf, row, col);
	if (x > maxVal)
	    maxVal = x;
	if (x >= minVal)
	    {
	    x -= minVal;
	    x /= binSize;
	    }
	}
    /*	index x is calculated, accumulate it when in range	*/
    if (x >= 0 && x < maxBinCount)
	{
	hist[x] += 1;
	if (aveCol >= 0)
	    {
	    double a;
	    a = lineFileNeedDouble(lf, row, aveCol);
	    total[x] += a;
	    }
	}
    else
        {
        verbose(2, "truncating index %d\n", x);
        truncation = (x > truncation) ? x : truncation;
        totalTooBig += 1;
        }
    }

lineFileClose(&lf);

if (truncation > 0)
    {
    if (real)
	fprintf(stderr,"large values truncated: need %d bins or larger binSize than %g\n",truncation, binSizeR);
    else
	fprintf(stderr,"large values truncated: need %d bins or larger binSize than %d\n",truncation, binSize);
    printf("Maximum value %f\n", maxVal);
    }

/* Figure out range that has data, maximum data
 * value and optionally compute averages. */
if (aveCol >= 0)
    {
    double ave, maxAve = -BIGNUM;
    for (i=0; i<maxBinCount; ++i)
	{
	int count = hist[i];
	if (count != 0)
	    {
	    ave = total[i]/count;
	    if (maxAve < ave) maxAve = ave;
	    if (minData > i) minData = i;
	    if (maxData < i) maxData = i;
	    }
	}
    maxCt = maxAve;
    }
else
    {
    for (i=0; i<maxBinCount; ++i)
	{
	int count = hist[i];
	if (count != 0)
	    {
	    if (maxCount < count) maxCount = count;
	    if (minData > i) minData = i;
	    if (maxData < i) maxData = i;
	    }
	}
    maxCt = maxCount;
    }

begin = minData;
end = maxData + 1;
if (verboseLevel()>1)
    {
    begin = 0;
    end = maxBinCount;
    }

if (probValues || freq)
    {
    totalCounts = 0;
    for (i=begin; i<end; ++i)
	totalCounts += hist[i];
    verbose(2,"#\ttotal data values: %llu\n", totalCounts);
    if (totalCounts < 1)
	errAbort("ERROR: No bins with any data ?\n");
    }

if (freq)
    maxCt = maxCt/(double)totalCounts;
if (doLog)
    maxCt = log(maxCt);

if (verboseLevel()>1)
    {
    if (noStar) {
	if (probValues)
	    printf("# bin\tValue\t\tprob-Value\t\tlog2(prob-Value)\tCPD\t1-CPD\n");
	else
	    printf("# bin  Value	ascii graph\n");
    } else
	printf("# bin  Value	ascii graph\n");
    }

cpd = 0.0;	/*	cumulative probability distribution	*/
/* Output results. */
for (i=begin; i<=end; ++i)
    {
    double ct;
    double binStartR = 0.0;
    int binStart = 0;
    long count;

    if (i != end)
	count = hist[i];
    else
	{
	if (totalTooBig == 0)
	    break;
        count = totalTooBig;
	}
    if (real)
	binStartR = i*binSizeR + minValR;
    else
	binStart = i*binSize + minVal;

    if (aveCol >= 0)
	{
	if (count > 0)
	    ct = total[i]/count;
	else
	    ct = 0;
	}
    else if (freq)
        {
        ct = count/(double)totalCounts;
        }
    else
	{
	ct = count;
	}
    if (doLog)
	ct = log(ct);
    if (noStar)
	{
	if (i == end)
	    printf("<minVal or >=");
	if (verboseLevel()>1)
	    printf("%02d\t", i);
	if (real)
	    {
	    if (probValues)
		{
		if (verboseLevel()>1)
		    printf("%g:%g", binStartR, binStartR+binSizeR);
		else
		    printf("%3d %g:%g", i, binStartR, binStartR+binSizeR);
		}
	    else
		printf("%3d %g:%g\t%f", i, binStartR, binStartR+binSizeR, ct);
	    }
	else
	    {
	    printf("%d\t%f", binStart, ct);
	    }
	if (probValues)
	    {
	    if (ct > 0)
		{
		cpd += (double)ct/(double)totalCounts;
		printf("\t%f\t%f\t%f\t%f\n", (double)ct/(double)totalCounts,
		    log((double)ct/(double)totalCounts)/log(2.0), cpd, 1.0-cpd);
		}
	    else
		printf("\t0.0      \tN/A     \t%f\t%f\n", cpd, 1.0-cpd);
	    }
	else
	    printf("\n");
	}
    else
	{
	int astCount = round(ct * 60.0 / maxCt);
	if (i == end)
	    printf("<minVal or >=");
	if (verboseLevel()>1)
	    printf("%2d ", i);
	if (real)
	    printf("%f ", binStartR);
	else
	    printf("%3d ", binStart);
	for (j=0; j<astCount; ++j)
	    putchar('*');
	if ((aveCol >= 0) || freq)
	    printf(" %f\n", ct);
	else
	    printf(" %ld\n", count);
	}
    }
}	/*	textHistogram()	*/
Beispiel #29
0
void checkGff(char *gff, struct hash *chromHash)
/* Check that CDS portions of GFF file have start
 * codons where they are supposed to. */
{
struct lineFile *lf = lineFileOpen(gff, TRUE);
char *row[10];
int cdsCount = 0, goodCount = 0, badCount = 0;

verbose(2,"#    scanning %d fields of gff file:\n#\t'%s'\n", gffRowCount, gff);
while (lineFileNextRowTab(lf, row, gffRowCount))
    {
    if (startsWith("CDS", row[2]))
        {
	int start = lineFileNeedNum(lf, row, 3) - 1;
	int end = lineFileNeedNum(lf, row, 4);
	int size = end-start;
	char strand = row[6][0];
	char chrom[64];
	struct dnaSeq *seq;
	char *startCodon;
	
	if (size < 1)
	    {
	    errAbort("start not before end line %d of %s",
	    	lf->lineIx, lf->fileName);
	    }
	if (strand != '+' && strand != '-')
	    {
	    errAbort("Expecting strand got %s line %d of %s",
	    	row[6], lf->lineIx, lf->fileName);
	    }
	if (startsWith("2-micron", row[0]))  // need to stop processing here
	    break;
	if (!startsWith("chr", row[0]))
	    continue;
	if (startsWith("chrMito", row[0]))	// change name to UCSC chrM
	    safef(chrom, sizeof(chrom), "%s", "chrM");
	else
	    safef(chrom, sizeof(chrom), "%s", row[0]);
	if ((seq = hashFindVal(chromHash, chrom)) == NULL)
	    errAbort("Unknown chromosome %s line %d of %s",
	    	row[0], lf->lineIx, lf->fileName);
	if (end > seq->size)
	    {
	    printf("end (%d) greater than %s size (%d) line %d of %s",
	        end, chrom, seq->size, lf->lineIx, lf->fileName);
	    ++badCount;
	    continue;
	    }
	startCodon = seq->dna + start;
	if (strand == '-')
	    reverseComplement(startCodon, size);
	if (!startsWith("ATG", startCodon))
	    {
	    char *s = startCodon;
	    verbose(3,"# not ATG: %s:%d-%d\t%c%c%c\t%c\n",
		chrom, start, end, s[0], s[1], s[2], strand);
	    ++badCount;
	    }
	else
	    ++goodCount;
	if (verboseLevel()>=4)
	    {
	    char *s = startCodon;
	    if (gffRowCount > 9)
		printf("%s\t%d\t%c%c%c\t%c\t%s\n", chrom, start, s[0], s[1], s[2], strand, row[9]);
	    else
		printf("%s\t%d\t%c%c%c\t%c\t%s\n", chrom, start, s[0], s[1], s[2], strand, row[8]);
	    }
	if (strand == '-')
	    reverseComplement(startCodon, size);
	++cdsCount;
	}
    }
lineFileClose(&lf);
printf("#    good %d, bad %d, total %d\n", goodCount, badCount, cdsCount);
}
Beispiel #30
0
char *htmlFormCgiVars(struct htmlPage *page, struct htmlForm *form, 
	char *buttonName, char *buttonVal, struct dyString *dyHeader)
/* Return cgi vars in name=val format from use having pressed
 * submit button of given name and value. */
{
struct dyString *dy = newDyString(0);
struct htmlFormVar *var;
boolean isMime = isMimeEncoded(form);
int mimeParts = 0;
char boundary[256];

while(TRUE)
    {
    if (isMime)
	{
	/* choose a new string for the boundary */
	/* Set initial seed */
	int i = 0;
    	safef(boundary,sizeof(boundary),"%s", "---------");
	srand( (unsigned)time( NULL ) );
	for(i=strlen(boundary);i<41;++i)
	    {
    	    int r = (int) 26 * (rand() / (RAND_MAX + 1.0));
	    boundary[i] = r+'A';
	    }
	boundary[i] = 0;
	}

    if (form == NULL)
	form = page->forms;
    if (buttonName != NULL && !isMime)
	appendCgiVar(dy, buttonName, buttonVal);
    for (var = form->vars; var != NULL; var = var->next)
	{
	if (sameWord(var->tagName, "SELECT") || 
	    sameWord(var->tagName, "TEXTAREA") || 
	    (var->type != NULL &&
	    ((sameWord(var->type, "RADIO") || sameWord(var->type, "TEXTBOX")
	    || sameWord(var->type, "PASSWORD") || sameWord(var->type, "HIDDEN")
	    || sameWord(var->type, "TEXT") || sameWord(var->type, "FILE")))))
	    {
	    char *val = var->curVal;
	    if (val == NULL)
		val = "";
	    if (isMime)
		{
		++mimeParts;
		appendMimeVar(dy, var->name, val, var->type, boundary);
		}
	    else	    
		appendCgiVar(dy, var->name, val);
	    }
	else if (var->type != NULL && sameWord(var->type, "CHECKBOX"))
	    {
	    if (var->curVal != NULL)
		{
		if (isMime)	    
		    {
		    ++mimeParts;
		    appendMimeVar(dy, var->name, var->curVal, var->type, boundary);
		    }
		else	    
		    appendCgiVar(dy, var->name, var->curVal);
		}
	    }
	else if (isMime && buttonName && sameWord(buttonName,var->name))
	    {
	    ++mimeParts;
	    appendMimeVar(dy, buttonName, buttonVal, NULL, boundary);
	    }
	}
    if (isMime) 
	{
	++mimeParts;
	appendMimeTerminus(dy,boundary);
	if (countOccurrences(boundary,strlen(boundary),dy->string,dy->stringSize) != mimeParts)
	    { /* boundary was found in input! # occurrences not as expected */
	    dyStringClear(dy);
    	    continue;  /* if at first you don't succeed, try another boundary string */
	    }
    	dyStringPrintf(dyHeader, "Content-type: multipart/form-data, boundary=%s\r\n",boundary);
	if (isMime && verboseLevel() == 2)
	    {
    	    mustWrite(stderr, dyHeader->string, dyHeader->stringSize);
	    mustWrite(stderr, dy->string, dy->stringSize);
	    }
	}
    break;
    }   
    
return dyStringCannibalize(&dy);

}