コード例 #1
0
void saveDataTable(struct expData *data)
/* Create the expression table the cheesey way by loading a temp tab file. */
{
FILE *f = hgCreateTabFile(".", table);
struct expData *cur;
struct sqlConnection *conn = sqlConnect(database);
expDataCreateTable(conn, table);
for (cur = data; cur != NULL; cur = cur->next)
    expDataTabOut(cur, f);
hgLoadTabFile(conn, ".", table, &f);
hgRemoveTabFile(".", table);
sqlDisconnect(&conn);
}
コード例 #2
0
ファイル: hgGnfMicroarray.c プロジェクト: elmargb/kentUtils
int lineToExpTable(char *line, char *table)
/* Create expression format table from line. */
{
FILE *f = hgCreateTabFile(tabDir, table);
int count = lineToExp(line, f);
if (doLoad)
    {
    struct sqlConnection *conn = sqlConnect(database);
    expRecordCreateTable(conn, table);
    hgLoadTabFile(conn, tabDir, table, &f);
    hgRemoveTabFile(tabDir, table);
    sqlDisconnect(&conn);
    }
return count;
}
コード例 #3
0
ファイル: hgLoadGenePred.c プロジェクト: blumroy/kentUtils
void hgLoadGenePred(char *db, char *table, int numGenePreds, char **genePredFiles)
/* hgLoadGenePred - Load up a mySQL database genePred table. */
{
struct genePred *genes = loadGenes(numGenePreds, genePredFiles);
struct sqlConnection *conn = sqlConnect(db);
char *tmpDir = ".";
FILE *tabFh = hgCreateTabFile(tmpDir, table);

mkTabFile(db, genes, tabFh);
genePredFreeList(&genes);
setupTable(db, conn, table);
hgLoadTabFile(conn, tmpDir, table, &tabFh);
sqlDisconnect(&conn);
hgRemoveTabFile(tmpDir, table);
}
コード例 #4
0
void makeNewExpTable(char *oldTable, struct maMedSpec *medList, char *newTable)
/* Create new expTable in hgFixed that is very similar
 * to oldExpTable, but with rows defined by medList. */
{
struct maMedSpec *med;
struct expRecord *oldExp, newExp;
struct sqlConnection *conn = sqlConnect("hgFixed");
FILE *f = hgCreateTabFile(tabDir, newTable);
char query[256], **row;
struct sqlResult *sr;
int curId = 0;

for (med = medList; med != NULL; med = med->next)
    {
    /* Load expression record from old table of first
     * thing in median. */
    sqlSafef(query, sizeof(query),
    	"select * from %s where id = %d", oldTable, med->ids[0]);
    sr = sqlGetResult(conn, query);
    if ((row = sqlNextRow(sr)) == NULL)
        errAbort("Can't find id %d in %s\n", med->ids[0], oldTable);
    oldExp = expRecordLoad(row);
    sqlFreeResult(&sr);
    if (oldExp->numExtras < 3)
        errAbort("Can only deal with old tables with 3 extras or more");


    /* Create new expression record, mostly just a shallow copy of old. */
    newExp = *oldExp;
    newExp.id = curId;
    ++curId;
    newExp.name = newExp.description = med->name;
    newExp.extras[2] = med->group;

    /* Save new one, free old one. */
    expRecordTabOut(&newExp, f);
    expRecordFree(&oldExp);
    }

if (doLoad)
    {
    expRecordCreateTable(conn, newTable);
    hgLoadTabFile(conn, tabDir, newTable, &f);
    hgRemoveTabFile(tabDir, newTable);
    }
sqlDisconnect(&conn);
}
コード例 #5
0
ファイル: hgRatioMicroarray.c プロジェクト: elmargb/kentUtils
void hgRatioMicroarray(char *absTable, char *relTable)
/* hgRatioMicroarray - Create a ratio form of microarray. */
{
struct maMedSpec *clumpList = NULL;
struct sqlConnection *conn = sqlConnect(database);
struct sqlResult *sr;
char **row;
char query[512];
struct expData *ex;
struct expData *expList = NULL;
FILE *f = hgCreateTabFile(tabDir, relTable);
int rowCount = 0;

if (clump != NULL)
    clumpList = maMedSpecReadAll(clump);

sqlSafef(query, sizeof(query),
	"select * from %s", absTable);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    ex = expDataLoad(row);
    slAddHead(&expList, ex);
    if (limit != 0 && rowCount >= limit)
        break;
    }
sqlFreeResult(&sr);
slReverse(&expList);
maExpDataClipMin(expList, minAbsVal, minAbsVal * 0.5);
maExpDataAddConstant(expList, c);
if (transpose)
    maExpDataDoLogRatioTranspose(expList, doAverage);
else
    maExpDataDoLogRatioGivenMedSpec(expList, clumpList, (doAverage) ? useMean : useMedian);
for (ex = expList; ex != NULL; ex = ex->next)
    expDataTabOut(ex, f);
if (doLoad)
    {
    expDataCreateTable(conn, relTable);
    hgLoadTabFile(conn, tabDir, relTable, &f);
    hgRemoveTabFile(tabDir, relTable);
    }
expDataFreeList(&expList);
sqlDisconnect(&conn);
}
コード例 #6
0
struct hash *loadModuleToMotif(struct sqlConnection *conn, char *fileName, 
	char *table)
/* Load up file which has a line per module.  The first word is the module
 * number, the rest of the tab-separated fields are motif names. 
 * Return hash keyed by module&motif. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *line, *module, *motif;
FILE *f = hgCreateTabFile(tmpDir, table);
struct dyString *dy = dyStringNew(512);
int motifCount = 0, moduleCount = 0;
struct hash *hash = newHash(18);

while (lineFileNextReal(lf, &line))
    {
    ++moduleCount;
    subChar(line, ' ', '_');
    module = nextWord(&line);
    while ((motif = nextWord(&line)) != NULL)
	{
	++motifCount;
        fprintf(f, "%s\t%s\n", module, motif);
	hashAdd2(hash, module, motif, NULL);
	}
    }
dyStringPrintf(dy,
"CREATE TABLE  %s (\n"
"    module int not null,\n"
"    motif varchar(255) not null,\n"
"              #Indices\n"
"    INDEX(module),\n"
"    INDEX(motif(16))\n"
")\n",  table);
sqlRemakeTable(conn, table, dy->string);
verbose(1, "%d modules, %d motifs in modules\n",
	moduleCount, motifCount);
hgLoadTabFile(conn, tmpDir, table, &f);
hgRemoveTabFile(tmpDir, table);
verbose(1, "Loaded %s table\n", table);
lineFileClose(&lf);
return hash;
}
コード例 #7
0
void makeNewDataTable(char *database, char *oldTable, struct maMedSpec *medList, char *newTable)
/* Create new table in database based on medians of data
 * in old table as defined by medList. */
{
struct sqlConnection *conn = sqlConnect(database);
FILE *f = hgCreateTabFile(tabDir, newTable);
struct expData *expList, *medianExpList, *exp;

expList = expDataLoadTableLimit(conn, oldTable, limit);
medianExpList = maExpDataMedianFromSpec(expList, medList, minExps);
for (exp = medianExpList; exp != NULL; exp = exp->next)
    expDataTabOut(exp, f);
if (doLoad)
    {
    expDataCreateTable(conn, newTable);
    hgLoadTabFile(conn, tabDir, newTable, &f);
    hgRemoveTabFile(tabDir, newTable);
    }
expDataFreeList(&expList);
expDataFreeList(&medianExpList);
sqlDisconnect(&conn);
}
コード例 #8
0
ファイル: hgGnfMicroarray.c プロジェクト: elmargb/kentUtils
void hgGnfMicroarray(char *expTable, char *dataTable, char *atlasFile)
/** Main function that does all the work for new-style*/
{
struct lineFile *lf = lineFileOpen(atlasFile, TRUE);
char *line;
int i, wordCount, expCount;
char **row;
float *data;
char *affyId;
struct hash *hash = newHash(17);
FILE *f = NULL;
int dataCount = 0;

/* Open Atlas file and use first line to create experiment table. */
if (!lineFileNextReal(lf, &line))
    errAbort("%s is empty", lf->fileName);
if (startsWith("Affy", line))
    line += 4;
if (startsWith("Gene Name", line))
    line += 9;
if (line[0] != '\t')
    errAbort("%s doesn't seem to be a new format atlas file", lf->fileName);
expCount = lineToExpTable(line+1, expTable);
if (expCount <= 0)
    errAbort("No experiments in %s it seems", lf->fileName);
warn("%d experiments\n", expCount);

f = hgCreateTabFile(tabDir, dataTable);

AllocArray(row, expCount);
AllocArray(data, expCount);
while (lineFileNextReal(lf, &line))
    {
    affyId = nextWord(&line);
    wordCount = chopByWhite(line, row, expCount);
    if (wordCount != expCount)
        errAbort("Expecting %d data points, got %d line %d of %s", 
		expCount, wordCount, lf->lineIx, lf->fileName);
    if (chopName != NULL)
        {
	char *e = stringIn(chopName, affyId);
	if (e != NULL)
	    *e = 0;
	}
    if (hashLookup(hash, affyId))
	{
        warn("Duplicate %s, skipping all but first.", affyId);
	continue;
	}
    for (i=0; i<expCount; ++i)
        {
        data[i] = sqlFloat(row[i]);
        }
    shortDataOut(f, affyId, expCount, data);
    ++dataCount;
    if (limit != 0 && dataCount >= limit)
        break;
    }
lineFileClose(&lf);

if (doLoad)
    {
    struct sqlConnection *conn = sqlConnect(database);
    expDataCreateTable(conn, dataTable);
    hgLoadTabFile(conn, tabDir, dataTable, &f);
    hgRemoveTabFile(tabDir, dataTable);
    sqlDisconnect(&conn);
    }
}
コード例 #9
0
ファイル: hgLoadRnaFold.c プロジェクト: elmargb/kentUtils
void hgLoadRnaFold(char *database, char *table, char *foldDir)
/* hgLoadRnaFold - Load a directory full of RNA fold files into database. */
{
char path[PATH_LEN];
struct slName *dirList, *dirEl;
struct lineFile *lf;
char *line, *word, *s, c;
FILE *f = hgCreateTabFile(tabDir, table);
int count = 0;

dirList = listDir(foldDir, "*");
for (dirEl = dirList; dirEl != NULL; dirEl = dirEl->next)
    {
    char *name = dirEl->name;
    if (sameString(name, "CVS"))
        continue;
    safef(path, sizeof(path), "%s/%s", foldDir, name);
    lf = lineFileOpen(path, TRUE);
    if (!lineFileNext(lf, &line, NULL))
        {
	if (warnEmpty)
	    {
	    warn("%s is empty, skipping\n", name);
	    lineFileClose(&lf);
	    continue;
	    }
	else
	    errAbort("%s is empty\n", name);
	}
    if (!isupper(line[0]))
	notFold(path, 1);
    fprintf(f, "%s\t", name);	/* Save name */
    fprintf(f, "%s\t", line);	/* Save sequence */
    lineFileNeedNext(lf, &line, NULL);
    c = line[0];
    if (c != '.' && c != '(')
        notFold(path, 2);
    word = nextWord(&line);
    fprintf(f, "%s\t", word);	/* Save nested parenthesis */

    /* Parse out (energy) term at end of line. */
    s = strchr(line, '(');
    if (s == NULL)
        notFold(path, 3);
    word = skipLeadingSpaces(s+1);
    if (word == NULL || (!word[0] == '-' && !isdigit(word[0])))
        notFold(path, 4);
    if ((s = strchr(word, ')')) == NULL)
        notFold(path, 5);
    *s = 0;
    fprintf(f, "%s\n", word);
    
    lineFileClose(&lf);
    ++count;
    }
printf("Parsed %d files\n", count);
if (doLoad)
    {
    struct sqlConnection *conn = sqlConnect(database);
    rnaFoldCreateTable(conn, table);
    hgLoadTabFile(conn, tabDir, table, &f);
    hgRemoveTabFile(tabDir, table);
    sqlDisconnect(&conn);
    }
}
コード例 #10
0
struct hash *loadMotifWeights(struct sqlConnection *conn, char *fileName, 
	char *table)
/* Load in XML weight motif file and save it in tab-separated format
 * and in hash keyed by motif name. */
{
struct esmMotifs *motifs = esmMotifsLoad(fileName);
struct esmMotif *motif;
FILE *f = hgCreateTabFile(tmpDir, table);
struct dyString *dy = dyStringNew(512);
struct hash *hash = newHash(16);

for (motif = motifs->esmMotif; motif != NULL; motif = motif->next)
    {
    struct esmWeights *weights = motif->esmWeights;
    int posCount = slCount(weights->esmPosition);
    struct esmPosition *pos;
    struct dnaMotif *dm;
    char name[64];


    fixMotifName(motif->Name, name, sizeof(name));
    AllocVar(dm);
    dm->name = cloneString(name);
    dm->columnCount = posCount;
    AllocArray(dm->aProb, posCount);
    AllocArray(dm->cProb, posCount);
    AllocArray(dm->gProb, posCount);
    AllocArray(dm->tProb, posCount);
    for (pos = weights->esmPosition; pos != NULL; pos = pos->next)
        {
	char *row[5];
	double odds[4], sumOdds = 0;
	int i;

	int ix = pos->Num;
	int rowSize = chopString(pos->Weights, ";", row, ArraySize(row));
	if (rowSize != 4)
	    errAbort("Expecting 4 values for weights in position %d of Motif %s",
               pos->Num, motif->Name);
	if (ix >= posCount)
	    errAbort("Num %d out of range in Motif %s", ix, motif->Name);
	for (i=0; i<4; ++i)
	    {
	    odds[i] = exp(atof(row[0]));
	    sumOdds += odds[i];
	    }
	dm->aProb[ix] = odds[0]/sumOdds;
	dm->cProb[ix] = odds[1]/sumOdds;
	dm->gProb[ix] = odds[2]/sumOdds;
	dm->tProb[ix] = odds[3]/sumOdds;
	}
    dnaMotifTabOut(dm, f);
    hashAdd(hash, dm->name, dm);
    }
dyStringPrintf(dy,
"CREATE TABLE %s (\n"
"    name varchar(16) not null,	# Motif name.\n"
"    columnCount int not null,	# Count of columns in motif.\n"
"    aProb longblob not null,	# Probability of A's in each column.\n"
"    cProb longblob not null,	# Probability of C's in each column.\n"
"    gProb longblob not null,	# Probability of G's in each column.\n"
"    tProb longblob not null,	# Probability of T's in each column.\n"
"              #Indices\n"
"    PRIMARY KEY(name)\n"
")\n", table);
sqlRemakeTable(conn, table, dy->string);
hgLoadTabFile(conn, tmpDir, table, &f);
hgRemoveTabFile(tmpDir, table);
verbose(1, "Processed %d motifs into %s\n", slCount(motifs->esmMotif), table);
return hash;
}
コード例 #11
0
void hgExpDistance(char *database, char *posTable, char *expTable, char *outTable)
/* hgExpDistance - Create table that measures expression distance between pairs. */
{
struct sqlConnection *conn = sqlConnect(database);
struct sqlResult *sr;
char query[256];
char **row;
struct hash *expHash = hashNew(16);
int realExpCount = -1;
struct microData *gene;
int rc, t;
pthread_t *threads = NULL;
pthread_attr_t attr;
int *threadID = NULL;
void *status;
char *tempDir = ".";
int arrayNum; 
struct microDataDistance *geneDistPtr = NULL;	
struct microDataDistance *geneDistArray = NULL;	
int geneIx;
FILE *f = NULL;

/* Get list/hash of all items with expression values. */
safef(query, sizeof(query), "select name,expCount,expScores from %s", posTable);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    char *name = row[0];
    if (!hashLookup(expHash, name))
	{
	int expCount = sqlUnsigned(row[1]);
	int commaCount;
	float *expScores = NULL;

	sqlFloatDynamicArray(row[2], &expScores, &commaCount);
	if (expCount != commaCount)
	    errAbort("expCount and expScores don't match on %s in %s", name, posTable);
	if (realExpCount == -1)
	    realExpCount = expCount;
	if (expCount != realExpCount)
	    errAbort("In %s some rows have %d experiments others %d", 
	    	name, expCount, realExpCount);
	AllocVar(gene);
	gene->expCount = expCount;
	gene->expScores = expScores;
	hashAddSaveName(expHash, name, gene, &gene->name);
	slAddHead(&geneList, gene);
	}
    }
sqlFreeResult(&sr);
conn = sqlConnect(database);
slReverse(&geneList);
geneCount = slCount(geneList);
printf("Have %d elements in %s\n", geneCount, posTable);

weights = getWeights(realExpCount);

if (optionExists("lookup"))
    geneList = lookupGenes(conn, optionVal("lookup", NULL), geneList);
geneCount = slCount(geneList);
printf("Got %d unique elements in %s\n", geneCount, posTable);

sqlDisconnect(&conn);	/* Disconnect because next step is slow. */


if (geneCount < 1)
    errAbort("ERROR: unique gene count less than one ?");

f = hgCreateTabFile(tempDir, outTable);
synQ = synQueueNew();

/* instantiate threads */
AllocArray( threadID, numThreads );
AllocArray( threads, numThreads );
pthread_attr_init( &attr );
pthread_mutex_init( &mutexDotOut, NULL );
pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_JOINABLE );

for (t = 0; t < numThreads; t++) {
	threadID[t] = t;
	rc = pthread_create( &threads[t], &attr, computeDistance, 
						(void *) &threadID[t]);
	if (rc)
		errAbort("ERROR: in pthread_create() %d\n", rc );
} 

/* this thread will write to the file from the queue */
for (arrayNum = 0; arrayNum < geneCount; arrayNum++) {
	geneDistArray = (struct microDataDistance *)synQueueGet( synQ );
	geneDistPtr = geneDistArray;
    	/* Print out closest GENEDISTS distances in tab file. */
    	for (geneIx=0; geneIx < GENEDISTS && geneIx < geneCount; 
						++geneIx, geneDistPtr++)
		if (geneDistPtr != NULL)
			fprintf(f, "%s\t%s\t%f\n", geneDistPtr->name1, 
				geneDistPtr->name2, geneDistPtr->distance);
		else
			errAbort("ERROR: writing distance %d to file\n", 
							geneIx);
	freeMem( geneDistArray );
}

/* synchronize all threads */
for (t = 0; t < numThreads; t++) {
	rc = pthread_join( threads[t], &status);
	if (rc)
		errAbort("ERROR: in pthread_join() %d\n", rc );
} 

printf("Made %s.tab\n", outTable);

slFreeList( &geneList );

pthread_mutex_destroy( &mutexDotOut );
pthread_attr_destroy( &attr );

/* Create and load table. */
conn = sqlConnect(database);
distanceTableCreate(conn, outTable);
hgLoadTabFile(conn, tempDir, outTable, &f);
printf("Loaded %s\n", outTable);

/* Add indices. */
safef(query, sizeof(query), "alter table %s add index(query(12))", outTable);
sqlUpdate(conn, query);
printf("Made query index\n");
if (optionExists("targetIndex"))
    {
    safef(query, sizeof(query), "alter table %s add index(target(12))", outTable);
    sqlUpdate(conn, query);
    printf("Made target index\n");
    }

hgRemoveTabFile(tempDir, outTable);
}
コード例 #12
0
void hgExpDistance(char *database, char *posTable, char *expTable, char *outTable)
/* hgExpDistance - Create table that measures expression distance between pairs. */
{
struct sqlConnection *conn = sqlConnect(database);
struct sqlResult *sr;
char query[256];
char **row;
struct hash *expHash = hashNew(16);
int realExpCount = -1;
struct microData *gene;
int rc, t;
pthread_t *threads = NULL;
pthread_attr_t attr;
int *threadID = NULL;
void *status;
char *tempDir = ".";
long time1, time2;

time1 = clock1000();

/* Get list/hash of all items with expression values. */
sqlSafef(query, sizeof(query), "select name,expCount,expScores from %s", posTable);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    char *name = row[0];
    if (!hashLookup(expHash, name))
	{
	int expCount = sqlUnsigned(row[1]);
	int commaCount;
	float *expScores = NULL;

	sqlFloatDynamicArray(row[2], &expScores, &commaCount);
	if (expCount != commaCount)
	    errAbort("expCount and expScores don't match on %s in %s", name, posTable);
	if (realExpCount == -1)
	    realExpCount = expCount;
	if (expCount != realExpCount)
	    errAbort("In %s some rows have %d experiments others %d", 
	    	name, expCount, realExpCount);
	AllocVar(gene);
	gene->expCount = expCount;
	gene->expScores = expScores;
	hashAddSaveName(expHash, name, gene, &gene->name);
	slAddHead(&geneList, gene);
	}
    }
sqlFreeResult(&sr);
conn = sqlConnect(database);
slReverse(&geneList);
geneCount = slCount(geneList);
printf("Have %d elements in %s\n", geneCount, posTable);

weights = getWeights(realExpCount);

if (optionExists("lookup"))
    geneList = lookupGenes(conn, optionVal("lookup", NULL), geneList);
geneCount = slCount(geneList);
printf("Got %d unique elements in %s\n", geneCount, posTable);

sqlDisconnect(&conn);	/* Disconnect because next step is slow. */


if (geneCount < 1)
    errAbort("ERROR: unique gene count less than one ?");

time2 = clock1000();
verbose(2, "records read time: %.2f seconds\n", (time2 - time1) / 1000.0);

f = hgCreateTabFile(tempDir, outTable);

/* instantiate threads */
AllocArray( threadID, numThreads );
AllocArray( threads, numThreads );
pthread_attr_init( &attr );
pthread_mutex_init( &mutexfilehandle, NULL );
pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_JOINABLE );

for (t = 0; t < numThreads; t++) {
	threadID[t] = t;
	rc = pthread_create( &threads[t], &attr, computeDistance, 
						(void *) &threadID[t]);
	if (rc)
		errAbort("ERROR: in pthread_create() %d\n", rc );
} 

/* synchronize all threads */
for (t = 0; t < numThreads; t++) {
	rc = pthread_join( threads[t], &status);
	if (rc)
		errAbort("ERROR: in pthread_join() %d\n", rc );
} 

printf("Made %s.tab\n", outTable);

slFreeList( &geneList );

pthread_mutex_destroy( &mutexfilehandle );
pthread_attr_destroy( &attr );

time1 = time2;
time2 = clock1000();
verbose(2, "distance computation time: %.2f seconds\n", (time2 - time1) / 1000.0);

/* Create and load table. */
conn = sqlConnect(database);
distanceTableCreate(conn, outTable);
hgLoadTabFile(conn, tempDir, outTable, &f);
printf("Loaded %s\n", outTable);

/* Add indices. */
sqlSafef(query, sizeof(query), "alter table %s add index(query(12))", outTable);
sqlUpdate(conn, query);
printf("Made query index\n");
if (optionExists("targetIndex"))
    {
    sqlSafef(query, sizeof(query), "alter table %s add index(target(12))", outTable);
    sqlUpdate(conn, query);
    printf("Made target index\n");
    }

hgRemoveTabFile(tempDir, outTable);

time1 = time2;
time2 = clock1000();
verbose(2, "table create/load/index time: %.2f seconds\n", (time2 - time1) / 1000.0);

}
コード例 #13
0
ファイル: hgKnownToSuper.c プロジェクト: sktu/kentUtils
void hgKnownToSuper(char *database, char *org, char *assFile)
/* hgKnownToSuper - Load knownToSuperfamily table. */
{
    struct sqlConnection *conn = sqlConnect(database);
    struct hash *pepToKnown = ensPepToKnown(conn, TRUE);
    char *table = "knownToSuper";
    FILE *f = hgCreateTabFile(tempDir, table);
    struct lineFile *lf = lineFileOpen(assFile, TRUE);
    boolean gotOrg = FALSE;
    int outCount = 0;
    char *row[6];

    while (lineFileRow(lf, row))
    {
        if (sameString(row[0], org))
        {
            char *pepName = row[1];
            char *regions = row[3];
            char *eVal = row[4];
            char *supId = row[5];
            char *knownId = hashFindVal(pepToKnown, pepName);
            if (knownId != NULL)
            {
                char *region, *e;
                int start,end;
                /* Loop through comma-separated region string. */
                for (region = regions; region != NULL; region = e)
                {
                    e = strchr(region, ',');
                    if (e != NULL)
                    {
                        *e++ = 0;
                        if (e[0] == 0)
                            e = NULL;
                    }
                    if (sscanf(region, "%d-%d", &start, &end) < 2)
                        errAbort("bad region %s line %d of %s", region,
                                 lf->lineIx, lf->fileName);
                    fprintf(f, "%s\t%s\t%d\t%d\t%s\n",
                            knownId, supId, start-1, end, eVal);
                    ++outCount;
                }
            }
            gotOrg = TRUE;
        }
    }
    lineFileClose(&lf);
    if (!gotOrg)
        errAbort("Looks like '%s' is not a recognized organism", org);
    if (outCount <= 0)
        errAbort("No good records found in %s", assFile);
    printf("%d records output\n", outCount);

    /* Refresh connection in case things took a while. */
    sqlDisconnect(&conn);
    conn = sqlConnect(database);

    /* Load up database. */
    createTable(conn, table);
    hgLoadTabFile(conn, tempDir, table, &f);
    hgRemoveTabFile(tempDir, table);
}
コード例 #14
0
void hgLoadChromGraph(boolean doLoad, char *db, char *track, char *fileName)
/* hgLoadChromGraph - Load up chromosome graph. */
{
    double minVal,maxVal;
    struct chromGraph *el, *list;
    FILE *f;
    char *tempDir = ".";
    char path[PATH_LEN], gbdbPath[PATH_LEN];
    char *idTable = optionVal("idTable", NULL);
    char *pathPrefix = NULL;

    if (idTable == NULL)
        list = chromGraphLoadAll(fileName);
    else
        list = chromGraphListWithTable(fileName, db, idTable);
    if (list == NULL)
        errAbort("%s is empty", fileName);

    /* Figure out min/max values */
    minVal = maxVal = list->val;
    for (el = list->next; el != NULL; el = el->next)
    {
        if (optionExists("minusLog10"))
        {
            if (el->val == 1)
                el->val = 0;
            else if (el->val > 0)
                el->val = -1 * log(el->val)/log(10);
        }
        if (el->val < minVal)
            minVal = el->val;
        if (el->val > maxVal)
            maxVal = el->val;
    }


    /* Sort and write out temp file. */
    slSort(&list, chromGraphCmp);
    f = hgCreateTabFile(tempDir, track);
    for (el = list; el != NULL; el = el->next)
        chromGraphTabOut(el, f);

    if (doLoad)
    {
        struct dyString *dy = dyStringNew(0);
        struct sqlConnection *conn;

        /* Set up connection to database and create main table. */
        conn = hAllocConn(db);
        sqlDyStringPrintf(dy, createString, track, hGetMinIndexLength(db));
        sqlRemakeTable(conn, track, dy->string);

        /* Load main table and clean up file handle. */
        hgLoadTabFile(conn, tempDir, track, &f);
        hgRemoveTabFile(tempDir, track);

        /* If need be create meta table.  If need be delete old row. */
        if (!sqlTableExists(conn, "metaChromGraph"))
            sqlUpdate(conn, metaCreateString);
        else
        {
            dyStringClear(dy);
            sqlDyStringPrintf(dy, "delete from metaChromGraph where name = '%s'",
                              track);
            sqlUpdate(conn, dy->string);
        }

        /* Make chrom graph file */
        safef(path, sizeof(path), "%s.cgb", track);
        chromGraphToBin(list, path);
        safef(path, sizeof(path), "/gbdb/%s/chromGraph", db);
        pathPrefix = optionVal("pathPrefix", path);
        safef(gbdbPath, sizeof(gbdbPath), "%s/%s.cgb", pathPrefix, track);

        /* Create new line in meta table */
        dyStringClear(dy);
        sqlDyStringPrintf(dy, "insert into metaChromGraph values('%s',%f,%f,'%s');",
                          track, minVal, maxVal, gbdbPath);
        sqlUpdate(conn, dy->string);
    }
}
コード例 #15
0
void hgLoadNetDist(char *inTab, char *db, char *outTable)
{
char *tempDir = ".";
FILE *f = hgCreateTabFile(tempDir, outTable);

struct sqlConnection *hConn = sqlConnect(db);

FILE *missingFile=NULL;
int missingCount=0;

struct lineFile *lf=NULL;
char *row[3];
int rowCount=3;

if (sqlRemap)
    {
    fetchRemapInfo(db);
    missingHash = newHash(16);  
    missingFile = mustOpen("missing.tab","w");
    }

/* read edges from file */


lf=lineFileOpen(inTab, TRUE);

/* print final values, remapping if needed */

while (lineFileNextRowTab(lf, row, rowCount))
    {
    char *geneI = row[0];
    char *geneJ = row[1];
    char *dij = row[2];
    char *gi=NULL, *gj=NULL;
    if (sqlRemap)
	{ /* it is possible for each id to have multiple remap values in hash */
	struct hashEl *hi=NULL, *hj=NULL, *hjSave=NULL;
	hi = hashLookup(aliasHash,geneI);
	hj = hashLookup(aliasHash,geneJ);
	missingCount += handleMissing(hi, geneI, missingHash, missingFile);
	missingCount += handleMissing(hj, geneJ, missingHash, missingFile);
	hjSave = hj;
	/* do all combinations of i and j */	
	for(;hi;hi=hashLookupNext(hi))
	    {
	    gi = (char *)hi->val;
	    for(;hj;hj=hashLookupNext(hj))
		{
		gj = (char *)hj->val;
		fprintf(f,"%s\t%s\t%s\n",gi,gj,dij);
		}
	    hj = hjSave; /* reset it */
	    }
	}
    else
	{
	gi=geneI;
	gj=geneJ;
	fprintf(f,"%s\t%s\t%s\n",gi,gj,dij);
	}
    }

lineFileClose(&lf);
carefulClose(&f);    

if (sqlRemap)
    {
    carefulClose(&missingFile);
    if (missingCount == 0)
	unlink("missing.tab");
    else	    
    	printf("hgLoadNetDist %d id-remapping misses, see missing.tab\n", missingCount);
    }

createTable(hConn, outTable);
hgLoadTabFile(hConn, tempDir, outTable, &f);
hgRemoveTabFile(tempDir, outTable);
}
コード例 #16
0
void knownToVisiGene(char *database)
/* knownToVisiGene - Create knownToVisiGene table by riffling through various other knownTo tables. */
{
char *tempDir = ".";
FILE *f = hgCreateTabFile(tempDir, outTable);
struct sqlConnection *hConn = sqlConnect(database);
struct sqlConnection *iConn = sqlConnect(visiDb);
struct sqlResult *sr;
char **row;
struct hash *geneImageHash = newHash(18);
struct hash *locusLinkImageHash = newHash(18);
struct hash *refSeqImageHash = newHash(18);
struct hash *genbankImageHash = newHash(18);
struct hash *probeImageHash = newHash(18);
struct hash *knownToLocusLinkHash = newHash(18);
struct hash *knownToRefSeqHash = newHash(18);
struct hash *knownToGeneHash = newHash(18);
struct hash *favorHugoHash = newHash(18);
struct hash *knownToProbeHash = newHash(18);
struct hash *knownToAllProbeHash = newHash(18);
struct genePred *knownList = NULL, *known;
struct hash *dupeHash = newHash(17);


probesDb  = optionVal("probesDb", database);
struct sqlConnection *probesConn = sqlConnect(probesDb);
vgProbes = sqlTableExists(probesConn,"vgProbes");
vgAllProbes = sqlTableExists(probesConn,"vgAllProbes");

/* Go through and make up hashes of images keyed by various fields. */
sr = sqlGetResult(iConn,
        NOSQLINJ "select image.id,imageFile.priority,gene.name,gene.locusLink,gene.refSeq,gene.genbank"
	",probe.id,submissionSet.privateUser,vgPrbMap.vgPrb,gene.id"
	" from image,imageFile,imageProbe,probe,gene,submissionSet,vgPrbMap"
	" where image.imageFile = imageFile.id"
	" and image.id = imageProbe.image"
	" and imageProbe.probe = probe.id"
	" and probe.gene = gene.id"
	" and image.submissionSet=submissionSet.id"
	" and vgPrbMap.probe = probe.id");

while ((row = sqlNextRow(sr)) != NULL)
    {
    int id = sqlUnsigned(row[0]);
    float priority = atof(row[1]);
    int privateUser = sqlSigned(row[7]);
    char vgPrb_Id[256];
    safef(vgPrb_Id, sizeof(vgPrb_Id), "vgPrb_%s",row[8]);
    int geneId = sqlUnsigned(row[9]);
    if (privateUser == 0)
	{
	addPrioritizedImage(probeImageHash, id, priority, geneId, vgPrb_Id);
	addPrioritizedImage(geneImageHash, id, priority, geneId, row[2]);
	addPrioritizedImage(locusLinkImageHash, id, priority, geneId, row[3]);
	addPrioritizedImage(refSeqImageHash, id, priority, geneId, row[4]);
	addPrioritizedImage(genbankImageHash, id, priority, geneId, row[5]);
	}
    }
verbose(2, "Made hashes of image: geneImageHash %d, locusLinkImageHash %d, refSeqImageHash %d"
           ", genbankImageHash %d probeImageHash %d\n", 
            geneImageHash->elCount, locusLinkImageHash->elCount, refSeqImageHash->elCount, 
	    genbankImageHash->elCount, probeImageHash->elCount);
sqlFreeResult(&sr);

/* Build up list of known genes. */
sr = sqlGetResult(hConn, NOSQLINJ "select * from knownGene");
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct genePred *known = genePredLoad(row);
    if (!hashLookup(dupeHash, known->name))
        {
	hashAdd(dupeHash, known->name, NULL);
	slAddHead(&knownList, known);
	}
    }
slReverse(&knownList);
sqlFreeResult(&sr);
verbose(2, "Got %d known genes\n", slCount(knownList));

/* Build up hashes from knownGene to other things. */
if (vgProbes)
    bestProbeOverlap(probesConn, "vgProbes", knownList, knownToProbeHash);
if (vgAllProbes)
    bestProbeOverlap(probesConn, "vgAllProbes", knownList, knownToAllProbeHash);

foldIntoHash(hConn, "knownToLocusLink", "name", "value", knownToLocusLinkHash, NULL, FALSE);
foldIntoHash(hConn, "knownToRefSeq", "name", "value", knownToRefSeqHash, NULL, FALSE);
foldIntoHash(hConn, "kgXref", "kgID", "geneSymbol", knownToGeneHash, favorHugoHash, FALSE);
foldIntoHash(hConn, "kgAlias", "kgID", "alias", knownToGeneHash, favorHugoHash, TRUE);
foldIntoHash(hConn, "kgProtAlias", "kgID", "alias", knownToGeneHash, favorHugoHash, TRUE);

verbose(2, "knownToLocusLink %d, knownToRefSeq %d, knownToGene %d knownToProbe %d knownToAllProbe %d\n", 
   knownToLocusLinkHash->elCount, knownToRefSeqHash->elCount, knownToGeneHash->elCount,
   knownToProbeHash->elCount, knownToAllProbeHash->elCount);

/* Try and find an image for each gene. */
for (known = knownList; known != NULL; known = known->next)
    {
    char *name = known->name;
    struct prioritizedImage *best = NULL;
    {
    best = bestImage(name, knownToLocusLinkHash, locusLinkImageHash);
    if (!best)
	best = bestImage(name, knownToRefSeqHash, refSeqImageHash);
    if (!best)
	{
	best = hashFindVal(genbankImageHash, name);
	}
    if (!best)
	best = bestImage(name, knownToGeneHash, geneImageHash);
    if (vgProbes && !best)
	best = bestImage(name, knownToProbeHash, probeImageHash);
    if (vgAllProbes && !best)
	best = bestImage(name, knownToAllProbeHash, probeImageHash);
    }	    
    if (best)
        {
	fprintf(f, "%s\t%d\t%d\n", name, best->imageId, best->geneId);
	}
    }

createTable(hConn, outTable);
hgLoadTabFile(hConn, tempDir, outTable, &f);
hgRemoveTabFile(tempDir, outTable);
}
コード例 #17
0
void hgExpDistance(char *database, char *posTable, char *expTable, char *outTable)
/* hgExpDistance - Create table that measures expression distance between pairs. */
{
struct sqlConnection *conn = sqlConnect(database);
struct sqlResult *sr;
char query[256];
char **row;
struct hash *expHash = hashNew(16);
int realExpCount = -1;
struct microData *geneList = NULL, *curGene, *gene;
int geneIx, geneCount = 0;
struct microData **geneArray = NULL;
float *weights = NULL;
char *tempDir = ".";
FILE *f = hgCreateTabFile(tempDir, outTable);

/* Get list/hash of all items with expression values. */
sqlSafef(query, sizeof(query), "select name,expCount,expScores from %s", posTable);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    char *name = row[0];
    if (!hashLookup(expHash, name))
	{
	int expCount = sqlUnsigned(row[1]);
	int commaCount;
	float *expScores = NULL;

	sqlFloatDynamicArray(row[2], &expScores, &commaCount);
	if (expCount != commaCount)
	    errAbort("expCount and expScores don't match on %s in %s", name, posTable);
	if (realExpCount == -1)
	    realExpCount = expCount;
	if (expCount != realExpCount)
	    errAbort("In %s some rows have %d experiments others %d", 
	    	name, expCount, realExpCount);
	AllocVar(gene);
	gene->expCount = expCount;
	gene->expScores = expScores;
	hashAddSaveName(expHash, name, gene, &gene->name);
	slAddHead(&geneList, gene);
	}
    }
sqlFreeResult(&sr);
conn = sqlConnect(database);
slReverse(&geneList);
geneCount = slCount(geneList);
printf("Have %d elements in %s\n", geneCount, posTable);

weights = getWeights(realExpCount);

if (optionExists("lookup"))
    geneList = lookupGenes(conn, optionVal("lookup", NULL), geneList);
geneCount = slCount(geneList);
printf("Got %d unique elements in %s\n", geneCount, posTable);

sqlDisconnect(&conn);	/* Disconnect because next step is slow. */


if (geneCount < 1)
    errAbort("ERROR: unique gene count less than one ?");
/* Get an array for sorting. */
AllocArray(geneArray, geneCount);
for (gene = geneList,geneIx=0; gene != NULL; gene = gene->next, ++geneIx)
    geneArray[geneIx] = gene;

/* Print out closest 1000 in tab file. */
for (curGene = geneList; curGene != NULL; curGene = curGene->next)
    {
    calcDistances(curGene, geneList, weights);
    qsort(geneArray, geneCount, sizeof(geneArray[0]), cmpMicroDataDistance);
    for (geneIx=0; geneIx < 1000 && geneIx < geneCount; ++geneIx)
        {
	gene = geneArray[geneIx];
	fprintf(f, "%s\t%s\t%f\n", curGene->name, gene->name, gene->distance);
	}
    dotOut();
    }
printf("Made %s.tab\n", outTable);

/* Create and load table. */
conn = sqlConnect(database);
distanceTableCreate(conn, outTable);
hgLoadTabFile(conn, tempDir, outTable, &f);
printf("Loaded %s\n", outTable);

/* Add indices. */
sqlSafef(query, sizeof(query), "alter table %s add index(query)", outTable);
sqlUpdate(conn, query);
printf("Made query index\n");
if (optionExists("targetIndex"))
    {
    sqlSafef(query, sizeof(query), "alter table %s add index(target)", outTable);
    sqlUpdate(conn, query);
    printf("Made target index\n");
    }

hgRemoveTabFile(tempDir, outTable);
}
コード例 #18
0
ファイル: hgFlyBase.c プロジェクト: elmargb/kentUtils
void hgFlyBase(char *database, char *genesFile)
/* hgFlyBase - Parse FlyBase genes.txt file and turn it into a couple of 
 * tables. */
{
char *tGene = "fbGene";
char *tSynonym = "fbSynonym";
char *tAllele = "fbAllele";
char *tRef = "fbRef";
char *tRole = "fbRole";
char *tPhenotype = "fbPhenotype";
char *tTranscript = "fbTranscript";
char *tGo = "fbGo";
char *tUniProt = "fbUniProt";
FILE *fGene = hgCreateTabFile(tabDir, tGene);
FILE *fSynonym = hgCreateTabFile(tabDir, tSynonym);
FILE *fAllele = hgCreateTabFile(tabDir, tAllele);
FILE *fRef = hgCreateTabFile(tabDir, tRef);
FILE *fRole = hgCreateTabFile(tabDir, tRole);
FILE *fPhenotype = hgCreateTabFile(tabDir, tPhenotype);
FILE *fTranscript = NULL;
FILE *fGo = hgCreateTabFile(tabDir, tGo);
FILE *fUniProt = hgCreateTabFile(tabDir, tUniProt);
struct lineFile *lf = lineFileOpen(genesFile, TRUE);
struct hash *refHash = newHash(19);
int nextRefId = 0;
int nextAlleleId = 0;
char *line, sub, type, *rest, *s;
char *geneSym = NULL, *geneName = NULL, *geneId = NULL;
int recordCount = 0;
struct slName *synList = NULL, *syn;
int curAllele = 0, curRef = 0;
struct ref *ref = NULL;
struct sqlConnection *conn;
struct hash *goUniqHash = newHash(18);

/* Make table from flybase genes to BGDP transcripts. */
if (doTranscript)
    {
    fTranscript = hgCreateTabFile(tabDir, tTranscript);
    getAllSplices(database, fTranscript);
    }

/* Make dummy reference for flybase itself. */
fprintf(fRef, "0\tFlyBase\n");

/* Loop through parsing and writing tab files. */
while (lineFileNext(lf, &line, NULL))
    {
    sub = line[0];
    if (sub == '#')
	{
	/* End of record. */
	++recordCount;
	if (geneId == NULL)
	    errAbort("Record without *z line ending line %d of %s",
		lf->lineIx, lf->fileName);

	/* Write out synonyms. */
	s = naForNull(geneSym);
	geneSym = ungreek(s);
	freeMem(s);
	s = naForNull(geneName);
	geneName = ungreek(s);
	if (! sameString(s, "n/a"))
	    freeMem(s);
	if (geneSym != NULL && !sameString(geneSym, "n/a"))
	    slNameStore(&synList, geneSym);
	if (geneName != NULL && !sameString(geneName, "n/a"))
	    slNameStore(&synList, geneName);
	for (syn = synList; syn != NULL; syn = syn->next)
	    {
	    s = ungreek(syn->name);
	    fprintf(fSynonym, "%s\t%s\n", geneId, s);
	    freeMem(s);
	    }

	/* Write out gene record. */
	fprintf(fGene, "%s\t%s\t%s\n", geneId, geneSym, geneName);

	/* Clean up. */
	freez(&geneSym);
	freez(&geneName);
	freez(&geneId);
	slFreeList(&synList);
	ref = NULL;
	curRef = curAllele = 0;
	continue;
	}
    else if (sub == 0)
       errAbort("blank line %d of %s, not allowed in gene.txt",
	    lf->lineIx, lf->fileName);
    else if (isalnum(sub))
       errAbort("line %d of %s begins with %c, not allowed",
	    lf->lineIx, lf->fileName, sub);
    type = line[1];
    rest = trimSpaces(line+2);
    if (sub == '*' && type == 'a')
	geneSym = cloneString(rest);
    else if (sub == '*' && type == 'e')
        geneName = cloneString(rest);
    else if (sub == '*' && type == 'z')
	{
        geneId = cloneString(rest); 
	if (!startsWith("FBgn", geneId))
	    errAbort("Bad FlyBase gene ID %s line %d of %s", geneId, 
		lf->lineIx, lf->fileName);
	}
    else if (type == 'i' && (sub == '*' || sub == '$'))
	{
	if (strlen(rest) > 2)	/* Avoid short useless ones. */
	    slNameStore(&synList, rest);
	}
    else if (sub == '*' && type == 'A')
        {
	if (geneId == NULL)
	    errAbort("Allele before geneId line %d of %s", 
	    	lf->lineIx, lf->fileName);
	curAllele = ++nextAlleleId;
	fprintf(fAllele, "%d\t%s\t%s\n", curAllele, geneId, rest);
	if (!sameString(rest, "classical") &&
	    !sameString(rest, "in vitro") &&
	    !sameString(rest, "wild-type") )
	    {
	    slNameStore(&synList, rest);
	    }
	}
    else if (sub == '*' && type == 'm')
	{
	if (geneId == NULL)
	    errAbort("*m protein ID before geneId line %d of %s", 
	    	lf->lineIx, lf->fileName);
	if (startsWith("UniProt", rest))
	    {
	    char *ptr = strchr(rest, ':');
	    if (ptr != NULL)
		ptr++;
	    else
		errAbort("Trouble parsing UniProt ID %s like %d of %s",
			 rest, lf->lineIx, lf->fileName);
	    fprintf(fUniProt, "%s\t%s\n", geneId, ptr);
	    }
	}
    else if (type == 'E')
        {
	ref = hashFindVal(refHash, rest);
	if (ref == NULL)
	    {
	    AllocVar(ref);
	    ref->id = ++nextRefId;
	    hashAdd(refHash, rest, ref);
	    subChar(rest, '\t', ' ');
	    fprintf(fRef, "%d\t%s\n", ref->id, rest);
	    }
	curRef = ref->id;
	}
    else if ((type == 'k' || type == 'r' || type == 'p') && sub != '@')
        {
	FILE *f = (type == 'r' ? fRole : fPhenotype);
	struct dyString *dy = suckSameLines(lf, line);
	subChar(dy->string, '\t', ' ');
	if (geneId == NULL)
	    errAbort("Expecting *z in record before line %d of %s",
	    	lf->lineIx, lf->fileName);
	fprintf(f, "%s\t%d\t%d\t%s\n", geneId, curAllele, curRef, dy->string);
	dyStringFree(&dy);
	}
    else if (type == 'd' || type == 'f' || type == 'F')
	{
	FILE *f = fGo;
	char aspect = (type == 'd') ? 'P' : (type == 'f') ? 'C' : 'F';
	char *goId = rest;
	char *p = strstr(goId, " ; ");
	char assoc[128];
	if (p == NULL)
	    continue;
	else
	    goId = firstWordInLine(p + 3);
	safef(assoc, sizeof(assoc), "%s.%s", geneId, goId);
	if (hashLookup(goUniqHash, assoc) == NULL)
	    {
	    hashAddInt(goUniqHash, assoc, 1);
	    fprintf(f, "%s\t%s\t%c\n", geneId, goId, aspect);
	    }
	}
    }
printf("Processed %d records in %d lines\n", recordCount, lf->lineIx);
lineFileClose(&lf);

conn = sqlConnect(database);
remakeTables(conn);

if (doLoad)
    {
    printf("Loading %s\n", tGene);
    hgLoadTabFile(conn, tabDir, tGene, &fGene);
    if (doTranscript)
	{
	printf("Loading %s\n", tTranscript);
	hgLoadTabFile(conn, tabDir, tTranscript, &fTranscript);
	}
    printf("Loading %s\n", tSynonym);
    hgLoadTabFile(conn, tabDir, tSynonym, &fSynonym);
    printf("Loading %s\n", tAllele);
    hgLoadTabFile(conn, tabDir, tAllele, &fAllele);
    printf("Loading %s\n", tRef);
    hgLoadTabFile(conn, tabDir, tRef, &fRef);
    printf("Loading %s\n", tRole);
    hgLoadTabFile(conn, tabDir, tRole, &fRole);
    printf("Loading %s\n", tPhenotype);
    hgLoadTabFile(conn, tabDir, tPhenotype, &fPhenotype);
    printf("Loading %s\n", tGo);
    hgLoadTabFile(conn, tabDir, tGo, &fGo);
    printf("Loading %s\n", tUniProt);
    hgLoadTabFile(conn, tabDir, tUniProt, &fUniProt);
    hgRemoveTabFile(tabDir, tGene);
    if (doTranscript)
	hgRemoveTabFile(tabDir, tTranscript);
    hgRemoveTabFile(tabDir, tSynonym);
    hgRemoveTabFile(tabDir, tAllele);
    hgRemoveTabFile(tabDir, tRef);
    hgRemoveTabFile(tabDir, tRole);
    hgRemoveTabFile(tabDir, tPhenotype);
    hgRemoveTabFile(tabDir, tGo);
    hgRemoveTabFile(tabDir, tUniProt);
    }
}