Beispiel #1
0
void mafSummaryTableCreate(struct sqlConnection *conn, char *tableName, int indexSize)
/* Create a mafSummary table with the given name
 */
{
char *createString =
"#Positions and scores for alignment blocks\n"
"CREATE TABLE %s (\n"
"       bin smallint unsigned not null,\n"
"       chrom varchar(255) not null,	# Chromosome\n"
"       chromStart int unsigned not null,	# Start position in chromosome\n"
"       chromEnd int unsigned not null,	# End position in chromosome\n"
"       src varchar(255) not null,	# Sequence name or database of alignment\n"
"       score float not null,	# Floating point score (0.0 to 1.0).\n"
"          #Indices\n"
"       leftStatus char(1),     # Status WRT preceding block\n"
"       rightStatus char(1),    # Status WRT following block\n"
"    INDEX(chrom(%d),bin)\n"
")\n";
struct dyString *dy = newDyString(1024);
sqlDyStringPrintf(dy, createString, tableName, indexSize, indexSize, indexSize);
sqlRemakeTable(conn, tableName, dy->string);
dyStringFree(&dy);
}
static void deleteJoin(struct sqlDeleter* sd, struct sqlConnection *conn,
                       char* table, char* column)
/* delete by creating a new table with a join */
{
char query[512], newTmpTable[64], oldTmpTable[64];
if (sd->accLoader != NULL)
    {
    /* build table, free to indicate it's completed */
    assert(!sd->deletesDone);
    sqlRemakeTable(conn, GB_DELETE_TMP, createGbDeleteTmp);
    sqlUpdaterCommit(sd->accLoader, conn);
    sqlUpdaterFree(&sd->accLoader);
    }
sd->deletesDone = TRUE;

/* remove existing tmp tables */
safef(newTmpTable, sizeof(newTmpTable), "%s_new_tmp", table);
safef(oldTmpTable, sizeof(oldTmpTable), "%s_old_tmp", table);
sqlDropTable(conn, newTmpTable);
sqlDropTable(conn, oldTmpTable);

gbSqlDupTableDef(conn, table, newTmpTable);

/* do join into new table of entries not in accession table */
safef(query, sizeof(query),
      "INSERT INTO %s SELECT %s.* FROM %s LEFT JOIN %s "
      "ON (%s.%s = %s.acc) WHERE %s.acc IS NULL",
      newTmpTable, table, table, GB_DELETE_TMP, table, column,
      GB_DELETE_TMP, GB_DELETE_TMP);
sqlUpdate(conn, query);

/* Now swap the table into place */
safef(query, sizeof(query), "RENAME TABLE %s TO %s, %s TO %s",
      table, oldTmpTable, newTmpTable, table);
sqlUpdate(conn, query);
sqlDropTable(conn, oldTmpTable);
}
Beispiel #3
0
void gtexGeneBedCreateTable(struct sqlConnection *conn, char *table)
/* Create expression record format table of given name. */
{
char query[1024];

sqlSafef(query, sizeof(query),
"CREATE TABLE %s (\n"
"   chrom varchar(255) not null,	# Reference sequence chromosome or scaffold\n"
"   chromStart int unsigned not null,	# Start position in chromosome\n"
"   chromEnd int unsigned not null,	# End position in chromosome\n"
"   name varchar(255) not null,	# Gene symbol\n"
"   score int unsigned not null,	# Score from 0-1000\n"
"   strand char(1) not null,	# + or - for strand\n"
"   geneId varchar(255) not null,	# Ensembl gene ID, referenced in GTEx data tables\n"
"   transcriptId varchar(255) not null,	# Ensembl ID of Canonical transcript; determines genomic position\n"
"   transcriptClass varchar(255) not null,	# GENCODE transcript class (coding, nonCoding, pseudo)\n"
"   expCount int unsigned not null,	# Number of experiment values\n"
"   expScores longblob not null,	# Comma separated list of experiment scores\n"
          "#Indices\n"
"   PRIMARY KEY(geneId)\n"
")\n",
    table);
sqlRemakeTable(conn, table, query);
}
struct hash *loadMotifWeights(struct sqlConnection *conn, char *fileName, 
	char *table)
/* Load in XML weight motif file and save it in tab-separated format
 * and in hash keyed by motif name. */
{
struct esmMotifs *motifs = esmMotifsLoad(fileName);
struct esmMotif *motif;
FILE *f = hgCreateTabFile(tmpDir, table);
struct dyString *dy = dyStringNew(512);
struct hash *hash = newHash(16);

for (motif = motifs->esmMotif; motif != NULL; motif = motif->next)
    {
    struct esmWeights *weights = motif->esmWeights;
    int posCount = slCount(weights->esmPosition);
    struct esmPosition *pos;
    struct dnaMotif *dm;
    char name[64];


    fixMotifName(motif->Name, name, sizeof(name));
    AllocVar(dm);
    dm->name = cloneString(name);
    dm->columnCount = posCount;
    AllocArray(dm->aProb, posCount);
    AllocArray(dm->cProb, posCount);
    AllocArray(dm->gProb, posCount);
    AllocArray(dm->tProb, posCount);
    for (pos = weights->esmPosition; pos != NULL; pos = pos->next)
        {
	char *row[5];
	double odds[4], sumOdds = 0;
	int i;

	int ix = pos->Num;
	int rowSize = chopString(pos->Weights, ";", row, ArraySize(row));
	if (rowSize != 4)
	    errAbort("Expecting 4 values for weights in position %d of Motif %s",
               pos->Num, motif->Name);
	if (ix >= posCount)
	    errAbort("Num %d out of range in Motif %s", ix, motif->Name);
	for (i=0; i<4; ++i)
	    {
	    odds[i] = exp(atof(row[0]));
	    sumOdds += odds[i];
	    }
	dm->aProb[ix] = odds[0]/sumOdds;
	dm->cProb[ix] = odds[1]/sumOdds;
	dm->gProb[ix] = odds[2]/sumOdds;
	dm->tProb[ix] = odds[3]/sumOdds;
	}
    dnaMotifTabOut(dm, f);
    hashAdd(hash, dm->name, dm);
    }
dyStringPrintf(dy,
"CREATE TABLE %s (\n"
"    name varchar(16) not null,	# Motif name.\n"
"    columnCount int not null,	# Count of columns in motif.\n"
"    aProb longblob not null,	# Probability of A's in each column.\n"
"    cProb longblob not null,	# Probability of C's in each column.\n"
"    gProb longblob not null,	# Probability of G's in each column.\n"
"    tProb longblob not null,	# Probability of T's in each column.\n"
"              #Indices\n"
"    PRIMARY KEY(name)\n"
")\n", table);
sqlRemakeTable(conn, table, dy->string);
hgLoadTabFile(conn, tmpDir, table, &f);
hgRemoveTabFile(tmpDir, table);
verbose(1, "Processed %d motifs into %s\n", slCount(motifs->esmMotif), table);
return hash;
}
void loadGeneToMotif(struct sqlConnection *conn, char *fileName, char *table,
	struct hash *geneToModuleHash, struct hash *moduleAndMotifHash,
	struct hash *motifHash, struct hash *positionsHash,
	char *regionTable)
/* Load file which is a big matrix with genes for rows and motifs for
 * columns.  There is a semicolon-separated list of numbers in the matrix 
 * where a gene has the motif, and an empty (tab separated) field
 * where there is no motif.  The numbers are relative to the
 * region associated with the gene in the positionsHash. 
 * Only load bits of this where motif actually occurs in module associated 
 * with gene. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *line;
FILE *f = hgCreateTabFile(tmpDir, table);
char *motifNames[32*1024], *row[32*1024];
int motifCount, rowSize, i;
char *gene, *module;
int geneCount = 0, total = 0;
struct dyString *dy = dyStringNew(512);
struct genomePos *motifPosList = NULL, *motifPosForGene;
struct genomePos *regionPosList = NULL, *regionPos;

/* Read first line, which is labels. */
if (!lineFileNextReal(lf, &line))
    errAbort("Empty file %s", fileName);
subChar(line, ' ', '_');
motifCount = chopLine(line, motifNames);
if (motifCount >= ArraySize(motifNames))
    errAbort("Too many motifs line 1 of %s", fileName);
lineFileExpectAtLeast(lf, 2, motifCount);
motifNames[0] = NULL;
for (i=1; i<motifCount; ++i)
    {
    char name[64];
    motifNames[i] = cloneString(fixMotifName(motifNames[i],name,sizeof(name)));
    if (!hashLookup(motifHash, motifNames[i]))
        errAbort("Motif %s is in %s but not modules_motifs.gxm",
		motifNames[i], fileName);
    }

/* Read subsequent lines. */
while ((rowSize = lineFileChopTab(lf, row)) != 0)
    {
    lineFileExpectWords(lf, motifCount, rowSize);
    gene = row[0];
    module = hashFindVal(geneToModuleHash, gene);
    if (module == NULL)
	{
        warn("WARNING: Gene %s in line %d of %s but not module_assignments.tab", 
		gene, lf->lineIx, lf->fileName);
	continue;
	}
    regionPos = NULL;
    for (i=1; i<rowSize; ++i)
        {
	if (row[i][0] != 0)
	    {
	    if (hashLookup2(moduleAndMotifHash, module, motifNames[i]))
		{
		regionPos = hashFindVal(positionsHash, gene);
		if (regionPos == NULL)
		    {
		    warn("WARNING: %s in %s but not gene_positions.tab",
		    	gene, fileName);
		    i = rowSize; continue;
		    }
		
		motifPosForGene = convertMotifPos(row[i], regionPos, 
			hashMustFindVal(motifHash, motifNames[i]), lf);
		motifPosList = slCat(motifPosForGene, motifPosList);
		++total;
		}
	    }
	}
    if (regionPos != NULL)
        {
	slAddHead(&regionPosList, regionPos);
	}
    ++geneCount;
    }
lineFileClose(&lf);

/* Output sorted table of all motif hits. */
    {
    struct genomePos *pos;
    slSort(&motifPosList, genomePosCmp);
    for (pos = motifPosList; pos != NULL; pos = pos->next)
	{
	int start = pos->start;
	int end = pos->end;
	if (start < 0) start = 0;
	fprintf(f, "%d\t", binFromRange(start, end));
	fprintf(f, "%s\t", pos->chrom);
	fprintf(f, "%d\t%d\t", start, end);
	fprintf(f, "%s\t", pos->motif);
	fprintf(f, "%d\t", pos->score);
	fprintf(f, "%c\t", pos->strand);
	fprintf(f, "%s\n", pos->name);
	}
    dyStringPrintf(dy,
    "CREATE TABLE  %s (\n"
    "    bin smallInt unsigned not null,\n"
    "    chrom varChar(255) not null,\n"
    "    chromStart int not null,\n"
    "    chromEnd int not null,\n"
    "    name varchar(255) not null,\n"
    "    score int not null,\n"
    "    strand char(1) not null,\n"
    "    gene varchar(255) not null,\n"
    "              #Indices\n"
    "    INDEX(gene(12)),\n"
    "    INDEX(name(16)),\n"
    "    INDEX(chrom(8),bin)\n"
    ")\n",  table);
    sqlRemakeTable(conn, table, dy->string);
    verbose(1, "%d genes, %d motifs, %d motifs in genes\n",
	    geneCount, motifCount-1, total);
    hgLoadTabFile(conn, tmpDir, table, &f);
    // hgRemoveTabFile(tmpDir, table);
    verbose(1, "Loaded %s table\n", table);
    slFreeList(&motifPosList);
    }

/* Now output sorted table of upstream regions. */
    {
    FILE *f = hgCreateTabFile(tmpDir, regionTable);
    struct genomePos *pos;
    dyStringClear(dy);
    dyStringPrintf(dy,
    "CREATE TABLE  %s (\n"
    "    bin smallInt unsigned not null,\n"
    "    chrom varChar(255) not null,\n"
    "    chromStart int not null,\n"
    "    chromEnd int not null,\n"
    "    name varchar(255) not null,\n"
    "    score int not null,\n"
    "    strand char(1) not null,\n"
    "              #Indices\n"
    "    INDEX(name(16)),\n"
    "    INDEX(chrom(8),bin)\n"
    ")\n",  regionTable);
    sqlRemakeTable(conn, regionTable, dy->string);
    slSort(&regionPosList, genomePosCmp);
    for (pos = regionPosList; pos != NULL; pos = pos->next)
	{
	int start = pos->start;
	int end = pos->end;
	if (start < 0) start = 0;
	fprintf(f, "%d\t", binFromRange(start, end));
	fprintf(f, "%s\t", pos->chrom);
	fprintf(f, "%d\t%d\t", start, end);
	fprintf(f, "%s\t", pos->name);
	fprintf(f, "%d\t", pos->score);
	fprintf(f, "%c\n", pos->strand);
	}
    hgLoadTabFile(conn, tmpDir, regionTable, &f);
    // hgRemoveTabFile(tmpDir, regionTable);
    }
}
void loadDatabase(char *database, char *tab, char *track)
/* Load database from tab file. */
{
    struct sqlConnection *conn = sqlConnect(database);
    struct dyString *dy = newDyString(1024);
    /* First make table definition. */
    if (sqlTable != NULL)
    {
        /* Read from file. */
        char *sql, *s;
        readInGulp(sqlTable, &sql, NULL);

        /* Chop of end-of-statement semicolon if need be. */
        s = strchr(sql, ';');
        if (s != NULL) *s = 0;

        sqlRemakeTable(conn, track, sql);
        freez(&sql);
    }
    else if (!oldTable)
    {
        /* Create definition statement. */
        verbose(1, "Creating table definition for %s\n", track);
        sqlDyStringPrintf(dy, "CREATE TABLE %s (\n", track);
        if (!noBin)
            dyStringAppend(dy, "  bin smallint unsigned not null,\n");
        dyStringAppend(dy, "  level int unsigned not null,\n");
        dyStringAppend(dy, "  tName varchar(255) not null,\n");
        dyStringAppend(dy, "  tStart int unsigned not null,\n");
        dyStringAppend(dy, "  tEnd int unsigned not null,\n");
        dyStringAppend(dy, "  strand char(1) not null,\n");
        dyStringAppend(dy, "  qName varchar(255) not null,\n");
        dyStringAppend(dy, "  qStart int unsigned not null,\n");
        dyStringAppend(dy, "  qEnd int unsigned not null,\n");
        dyStringAppend(dy, "  chainId int unsigned not null,\n");
        dyStringAppend(dy, "  ali int unsigned not null,\n");
        dyStringAppend(dy, "  score double not null,\n");
        dyStringAppend(dy, "  qOver int not null, \n");
        dyStringAppend(dy, "  qFar int not null, \n");
        dyStringAppend(dy, "  qDup int not null, \n");
        dyStringAppend(dy, "  type varchar(255) not null,\n");
        dyStringAppend(dy, "  tN int not null, \n");
        dyStringAppend(dy, "  qN int not null, \n");
        dyStringAppend(dy, "  tR int not null, \n");
        dyStringAppend(dy, "  qR int not null, \n");
        dyStringAppend(dy, "  tNewR int not null, \n");
        dyStringAppend(dy, "  qNewR int not null, \n");
        dyStringAppend(dy, "  tOldR int not null, \n");
        dyStringAppend(dy, "  qOldR int not null, \n");
        dyStringAppend(dy, "  tTrf int not null, \n");
        dyStringAppend(dy, "  qTrf int not null, \n");
        dyStringAppend(dy, "#Indices\n");
        if (!noBin)
            dyStringAppend(dy, "  INDEX(tName(16),bin),\n");
        dyStringAppend(dy, "  INDEX(tName(16),tStart)\n");
        dyStringAppend(dy, ")\n");
        sqlRemakeTable(conn, track, dy->string);
    }

    dyStringClear(dy);
    sqlDyStringPrintf(dy, "load data local infile '%s' into table %s", tab, track);
    verbose(1, "Loading %s into %s\n", track, database);
    sqlUpdate(conn, dy->string);
    /* add a comment to the history table and finish up connection */
    hgHistoryComment(conn, "Loaded net table %s", track);
    sqlDisconnect(&conn);
}
static void loadDatabase(char *database, char *track, int bedSize, struct bedStub *bedList)
/* Load database from bedList. */
{
struct sqlConnection *conn;
struct dyString *dy = newDyString(1024);
char *tab = (char *)NULL;
int loadOptions = (optionExists("onServer") ? SQL_TAB_FILE_ON_SERVER : 0);

if ( ! noLoad )
    conn = sqlConnect(database);

if ((char *)NULL != tmpDir)
    tab = cloneString(rTempName(tmpDir,"loadBed",".tab"));
else
    tab = cloneString("bed.tab");

if (bedDetail && sqlTable == NULL) 
    errAbort("bedDetail format requires sqlTable option");
if (bedDetail && !strictTab) 
    errAbort("bedDetail format must be tab separated");
if (bedDetail && !noBin) 
    noBin = TRUE;

/* First make table definition. */
if (sqlTable != NULL && !oldTable)
    {
    /* Read from file. */
    char *sql, *s;
    readInGulp(sqlTable, &sql, NULL);
    /* Chop off end-of-statement semicolon if need be. */
    s = strchr(sql, ';');
    if (s != NULL) *s = 0;
    
    if ( !noLoad )
        {
        if (renameSqlTable)
            {
            char *pos = stringIn("CREATE TABLE ", sql);
            if (pos == NULL)
                errAbort("Can't find CREATE TABLE in %s\n", sqlTable);
            char *oldSql = cloneString(sql);
            nextWord(&pos); nextWord(&pos);
            char *tableName = nextWord(&pos);
            sql = replaceChars(oldSql, tableName, track);
            }
        verbose(1, "Creating table definition for %s\n", track);
        sqlRemakeTable(conn, track, sql);
        if (!noBin) 
	    addBinToEmptyTable(conn, track);
	adjustSqlTableColumns(conn, track, bedSize);
	}
    
    freez(&sql);
    }
else if (!oldTable)
    {
    int minLength;

    if (noLoad)
	minLength=6;
    else if (maxChromNameLength)
	minLength = maxChromNameLength;
    else
	minLength = hGetMinIndexLength(database);
    verbose(2, "INDEX chrom length: %d\n", minLength);

    /* Create definition statement. */
    verbose(1, "Creating table definition for %s\n", track);
    dyStringPrintf(dy, "CREATE TABLE %s (\n", track);
    if (!noBin)
       dyStringAppend(dy, "  bin smallint unsigned not null,\n");
    dyStringAppend(dy, "  chrom varchar(255) not null,\n");
    dyStringAppend(dy, "  chromStart int unsigned not null,\n");
    dyStringAppend(dy, "  chromEnd int unsigned not null,\n");
    if (bedSize >= 4)
       maybeBedGraph(4, dy, "  name varchar(255) not null,\n");
    if (bedSize >= 5)
	{
	if (allowNegativeScores)
	    maybeBedGraph(5, dy, "  score int not null,\n");
	else
	    maybeBedGraph(5, dy, "  score int unsigned not null,\n");
	}
    if (bedSize >= 6)
       maybeBedGraph(6, dy, "  strand char(1) not null,\n");
    if (bedSize >= 7)
       maybeBedGraph(7, dy, "  thickStart int unsigned not null,\n");
    if (bedSize >= 8)
       maybeBedGraph(8, dy, "  thickEnd int unsigned not null,\n");
    /*	As of 2004-11-22 the reserved field is used as itemRgb in code */
    if (bedSize >= 9)
       maybeBedGraph(9, dy, "  reserved int unsigned  not null,\n");
    if (bedSize >= 10)
       maybeBedGraph(10, dy, "  blockCount int unsigned not null,\n");
    if (bedSize >= 11)
       maybeBedGraph(11, dy, "  blockSizes longblob not null,\n");
    if (bedSize >= 12)
       maybeBedGraph(12, dy, "  chromStarts longblob not null,\n");
    if (bedSize >= 13)
       maybeBedGraph(13, dy, "  expCount int unsigned not null,\n");
    if (bedSize >= 14)
       maybeBedGraph(14, dy, "  expIds longblob not null,\n");
    if (bedSize >= 15)
       maybeBedGraph(15, dy, "  expScores longblob not null,\n");
    dyStringAppend(dy, "#Indices\n");
    if (nameIx && (bedSize >= 4) && (0 == bedGraph))
       dyStringAppend(dy, "  INDEX(name(16)),\n");
    if (noBin)
	{
	dyStringPrintf(dy, "  INDEX(chrom(%d),chromStart)\n", minLength);
	}
    else
	{
        dyStringPrintf(dy, "  INDEX(chrom(%d),bin)\n", minLength);
	}
    dyStringAppend(dy, ")\n");
    if (noLoad)
	verbose(2,"%s", dy->string);
    else
	sqlRemakeTable(conn, track, dy->string);
    }

verbose(1, "Saving %s\n", tab);
writeBedTab(tab, bedList, bedSize);

if ( ! noLoad )
    {
    verbose(1, "Loading %s\n", database);
    if (customTrackLoader)
	sqlLoadTabFile(conn, tab, track, loadOptions|SQL_TAB_FILE_WARN_ON_WARN);
    else
	sqlLoadTabFile(conn, tab, track, loadOptions);

    if (! noHistory)
	{
	char comment[256];
	/* add a comment to the history table and finish up connection */
	safef(comment, sizeof(comment),
	    "Add %d element(s) from bed list to %s table",
		slCount(bedList), track);
	hgHistoryComment(conn, comment);
	}
    if(fillInScoreColumn != NULL)
        {
        char query[500];
        char buf[500];
        struct sqlResult *sr;
        safef(query, sizeof(query), "select sum(score) from %s", track);
        if(sqlQuickQuery(conn, query, buf, sizeof(buf)))
            {
            unsigned sum = sqlUnsigned(buf);
            if (!sum)
                {
                safef(query, sizeof(query), "select min(%s), max(%s) from %s", fillInScoreColumn, fillInScoreColumn, track);
                if ((sr = sqlGetResult(conn, query)) != NULL)
                    {
                    char **row = sqlNextRow(sr);
                    if(row != NULL)
                        {
                        float min = sqlFloat(row[0]);
                        float max = sqlFloat(row[1]);
			if ( !(max == -1 && min == -1)) // if score is -1 then ignore, as if it werent present
			    {
			    if (max == min || sameString(row[0],row[1])) // this will lead to 'inf' score value in SQL update causing an error
				errAbort("Could not set score in table %s max(%s)=min(%s)=%s\n", track, fillInScoreColumn, fillInScoreColumn, row[0]);
                            sqlFreeResult(&sr);

			    // Calculate a, b s/t f(x) = ax + b maps min-max => minScore-1000
			    float a = (1000-minScore) / (max - min);
			    float b = 1000 - ((1000-minScore) * max) / (max - min);

			    safef(query, sizeof(query), "update %s set score = round((%f * %s) + %f)",  track, a, fillInScoreColumn, b);
			    int changed = sqlUpdateRows(conn, query, NULL);
			    verbose(2, "update query: %s; changed: %d\n", query, changed);
			    }
			else
			    {
                            sqlFreeResult(&sr);
			    verbose(2, "score not updated; all values for column %s are -1\n", fillInScoreColumn);
			    }
			}
                    }
                }
            }

        }
    sqlDisconnect(&conn);
    /*	if temp dir specified, unlink file to make it disappear */
    if ((char *)NULL != tmpDir)
	unlink(tab);
    }
else
    verbose(1, "No load option selected, see file: %s\n", tab);

}	/*	static void loadDatabase()	*/
Beispiel #8
0
void hgTrackDb(char *org, char *database, char *trackDbName, char *sqlFile, char *hgRoot,
               boolean strict)
/* hgTrackDb - Create trackDb table from text files. */
{
struct trackDb *td;
char tab[PATH_LEN];
safef(tab, sizeof(tab), "%s.tab", trackDbName);

struct trackDb *tdbList = buildTrackDb(org, database, hgRoot, strict);
tdbList = flatten(tdbList);
slSort(&tdbList, trackDbCmp);
verbose(1, "Loaded %d track descriptions total\n", slCount(tdbList));

/* Write to tab-separated file; hold off on html, since it must be encoded */
    {
    verbose(2, "Starting write of tabs to %s\n", tab);
    FILE *f = mustOpen(tab, "w");
    for (td = tdbList; td != NULL; td = td->next)
        {
        hVarSubstTrackDb(td, database);
        char *hold = td->html;
        td->html = "";
	subChar(td->type, '\t', ' ');	/* Tabs confuse things. */
	subChar(td->shortLabel, '\t', ' ');	/* Tabs confuse things. */
	subChar(td->longLabel, '\t', ' ');	/* Tabs confuse things. */
	trackDbTabOut(td, f);
        td->html = hold;
        }
    carefulClose(&f);
    verbose(2, "Wrote tab representation to %s\n", tab);
    }

/* Update database */
    {
    char *create, *end;
    char query[256];
    struct sqlConnection *conn = sqlConnect(database);

    /* Load in table definition. */
    readInGulp(sqlFile, &create, NULL);
    create = trimSpaces(create);
    create = substituteTrackName(create, trackDbName);
    end = create + strlen(create)-1;
    if (*end == ';') *end = 0;
    sqlRemakeTable(conn, trackDbName, create);

    /* Load in regular fields. */
    sqlSafef(query, sizeof(query), "load data local infile '%s' into table %s", tab, trackDbName);
    verbose(2, "sending mysql \"%s\"\n", query);
    sqlUpdate(conn, query);
    verbose(2, "done tab file load");

    /* Load in html and settings fields. */
    for (td = tdbList; td != NULL; td = td->next)
	{
        if (isEmpty(td->html))
	    {
	    if (strict && !trackDbLocalSetting(td, "parent") && !trackDbLocalSetting(td, "superTrack") &&
	        !sameString(td->track,"cytoBandIdeo"))
		{
		fprintf(stderr, "Warning: html missing for %s %s %s '%s'\n",org, database, td->track, td->shortLabel);
		}
	    }
	else
	    {
	    updateBigTextField(conn,  trackDbName, "tableName", td->track, "html", td->html);
	    }
	if (td->settingsHash != NULL)
	    {
	    char *settings = settingsFromHash(td->settingsHash);
	    updateBigTextField(conn, trackDbName, "tableName", td->track,
	        "settings", settings);
	    if (showSettings)
		{
		verbose(1, "%s: type='%s';", td->track, td->type);
		if (isNotEmpty(settings))
		    {
		    char *oneLine = replaceChars(settings, "\n", "; ");
		    eraseTrailingSpaces(oneLine);
		    verbose(1, " %s", oneLine);
		    freeMem(oneLine);
		    }
		verbose(1, "\n");
		}
	    freeMem(settings);
	    }
	}

    sqlDisconnect(&conn);
    verbose(1, "Loaded database %s\n", database);
    }
}
void hgLoadChromGraph(boolean doLoad, char *db, char *track, char *fileName)
/* hgLoadChromGraph - Load up chromosome graph. */
{
    double minVal,maxVal;
    struct chromGraph *el, *list;
    FILE *f;
    char *tempDir = ".";
    char path[PATH_LEN], gbdbPath[PATH_LEN];
    char *idTable = optionVal("idTable", NULL);
    char *pathPrefix = NULL;

    if (idTable == NULL)
        list = chromGraphLoadAll(fileName);
    else
        list = chromGraphListWithTable(fileName, db, idTable);
    if (list == NULL)
        errAbort("%s is empty", fileName);

    /* Figure out min/max values */
    minVal = maxVal = list->val;
    for (el = list->next; el != NULL; el = el->next)
    {
        if (optionExists("minusLog10"))
        {
            if (el->val == 1)
                el->val = 0;
            else if (el->val > 0)
                el->val = -1 * log(el->val)/log(10);
        }
        if (el->val < minVal)
            minVal = el->val;
        if (el->val > maxVal)
            maxVal = el->val;
    }


    /* Sort and write out temp file. */
    slSort(&list, chromGraphCmp);
    f = hgCreateTabFile(tempDir, track);
    for (el = list; el != NULL; el = el->next)
        chromGraphTabOut(el, f);

    if (doLoad)
    {
        struct dyString *dy = dyStringNew(0);
        struct sqlConnection *conn;

        /* Set up connection to database and create main table. */
        conn = hAllocConn(db);
        sqlDyStringPrintf(dy, createString, track, hGetMinIndexLength(db));
        sqlRemakeTable(conn, track, dy->string);

        /* Load main table and clean up file handle. */
        hgLoadTabFile(conn, tempDir, track, &f);
        hgRemoveTabFile(tempDir, track);

        /* If need be create meta table.  If need be delete old row. */
        if (!sqlTableExists(conn, "metaChromGraph"))
            sqlUpdate(conn, metaCreateString);
        else
        {
            dyStringClear(dy);
            sqlDyStringPrintf(dy, "delete from metaChromGraph where name = '%s'",
                              track);
            sqlUpdate(conn, dy->string);
        }

        /* Make chrom graph file */
        safef(path, sizeof(path), "%s.cgb", track);
        chromGraphToBin(list, path);
        safef(path, sizeof(path), "/gbdb/%s/chromGraph", db);
        pathPrefix = optionVal("pathPrefix", path);
        safef(gbdbPath, sizeof(gbdbPath), "%s/%s.cgb", pathPrefix, track);

        /* Create new line in meta table */
        dyStringClear(dy);
        sqlDyStringPrintf(dy, "insert into metaChromGraph values('%s',%f,%f,'%s');",
                          track, minVal, maxVal, gbdbPath);
        sqlUpdate(conn, dy->string);
    }
}
void hgFindSpec(char *org, char *database, char *hgFindSpecName, char *sqlFile,
                char *hgRoot, boolean strict)
/* hgFindSpec - Create hgFindSpec table from text files. */
{
    struct hash *uniqHash = newHash(8);
    struct hash *htmlHash = newHash(8);
    struct hgFindSpec *hfsList = NULL, *hfs;
    char rootDir[512], orgDir[512], asmDir[512];
    char tab[512];
    snprintf(tab, sizeof(tab), "%s.tab", hgFindSpecName);

    /* Create track list from hgRoot and hgRoot/org and hgRoot/org/assembly
     * ra format database. */
    sprintf(rootDir, "%s", hgRoot);
    sprintf(orgDir, "%s/%s", hgRoot, org);
    sprintf(asmDir, "%s/%s/%s", hgRoot, org, database);
    layerOn(strict, database, asmDir, uniqHash, htmlHash, FALSE, &hfsList);
    layerOn(strict, database, orgDir, uniqHash, htmlHash, FALSE, &hfsList);
    layerOn(strict, database, rootDir, uniqHash, htmlHash, TRUE, &hfsList);
    slSort(&hfsList, hgFindSpecCmp);
    if (verboseLevel() > 0)
        printf("Loaded %d search specs total\n", slCount(hfsList));

    /* Write to tab-separated file. */
    {
        FILE *f = mustOpen(tab, "w");
        for (hfs = hfsList; hfs != NULL; hfs = hfs->next)
            hgFindSpecTabOut(hfs, f);
        carefulClose(&f);
    }

    /* Update database */
    {
        char *create, *end;
        char query[256];
        struct sqlConnection *conn = sqlConnect(database);

        /* Load in table definition. */
        readInGulp(sqlFile, &create, NULL);
        create = trimSpaces(create);
        create = subTrackName(create, hgFindSpecName);
        end = create + strlen(create)-1;
        if (*end == ';') *end = 0;
        sqlRemakeTable(conn, hgFindSpecName, create);

        /* Load in regular fields. */
        sqlSafef(query, sizeof query, "load data local infile '%s' into table %s", tab,
                 hgFindSpecName);
        sqlUpdate(conn, query);

        /* Load in settings fields. */
        for (hfs = hfsList; hfs != NULL; hfs = hfs->next)
        {
            if (hfs->settingsHash != NULL)
            {
                char *settings = settingsFromHash(hfs->settingsHash);
                updateBigTextField(conn, hgFindSpecName, "searchName",
                                   hfs->searchName,
                                   "searchSettings", settings);
                freeMem(settings);
            }
        }

        sqlDisconnect(&conn);
        if (verboseLevel() > 0)
            printf("Loaded database %s\n", database);
    }
}
Beispiel #11
0
void remakeTables(struct sqlConnection *conn)
/* Remake all our tables. */
{
sqlRemakeTable(conn, "fbGene", 
"#Links FlyBase IDs, gene symbols and gene names\n"
"NOSQLINJ CREATE TABLE fbGene (\n"
"    geneId varchar(255) not null,	# FlyBase ID\n"
"    geneSym varchar(255) not null,	# Short gene symbol\n"
"    geneName varchar(255) not null,	# Gene name - up to a couple of words\n"
"              #Indices\n"
"    PRIMARY KEY(geneId(11)),\n"
"    INDEX(geneSym(8)),\n"
"    INDEX(geneName(12))\n"
")\n");

sqlRemakeTable(conn, "fbTranscript", 
"#Links FlyBase gene IDs and BDGP transcripts\n"
"NOSQLINJ CREATE TABLE fbTranscript (\n"
"    geneId varchar(255) not null,	# FlyBase ID\n"
"    transcriptId varchar(255) not null,	# BDGP Transcript ID\n"
"              #Indices\n"
"    PRIMARY KEY(transcriptId(11)),\n"
"    INDEX(transcriptId(11))\n"
")\n");

sqlRemakeTable(conn, "fbSynonym", 
"#Links all the names we call a gene to it's flybase ID\n"
"NOSQLINJ CREATE TABLE fbSynonym (\n"
"    geneId varchar(255) not null,	# FlyBase ID\n"
"    name varchar(255) not null,	# A name (synonym or real\n"
"              #Indices\n"
"    INDEX(geneId(11)),\n"
"    INDEX(name(12))\n"
")\n");

sqlRemakeTable(conn, "fbAllele", 
"#The alleles of a gene\n"
"NOSQLINJ CREATE TABLE fbAllele (\n"
"    id int not null,	# Allele ID\n"
"    geneId varchar(255) not null,	# Flybase ID of gene\n"
"    name varchar(255) not null,	# Allele name\n"
"              #Indices\n"
"    PRIMARY KEY(id),\n"
"    INDEX(geneId(11))\n"
")\n");

sqlRemakeTable(conn, "fbRef", 
"#A literature or sometimes database reference\n"
"NOSQLINJ CREATE TABLE fbRef (\n"
"    id int not null,	# Reference ID\n"
"    text longblob not null,	# Usually begins with flybase ref ID, but not always\n"
"              #Indices\n"
"    PRIMARY KEY(id)\n"
")\n");

sqlRemakeTable(conn, "fbRole", 
"#Role of gene in wildType\n"
"NOSQLINJ CREATE TABLE fbRole (\n"
"    geneId varchar(255) not null,	# Flybase Gene ID\n"
"    fbAllele int not null,	# ID in fbAllele table or 0 if not allele-specific\n"
"    fbRef int not null,	# ID in fbRef table\n"
"    text longblob not null,	# Descriptive text\n"
"              #Indices\n"
"    INDEX(geneId(11))\n"
")\n");

sqlRemakeTable(conn, "fbPhenotype", 
"#Observed phenotype in mutant.  Sometimes contains gene function info\n"
"NOSQLINJ CREATE TABLE fbPhenotype (\n"
"    geneId varchar(255) not null,	# Flybase Gene ID\n"
"    fbAllele int not null,	# ID in fbAllele table or 0 if not allele-specific\n"
"    fbRef int not null,	# ID in fbRef table\n"
"    text longblob not null,	# Descriptive text\n"
"              #Indices\n"
"    INDEX(geneId(11))\n"
")\n");

sqlRemakeTable(conn, "fbGo", 
"#Links FlyBase gene IDs and GO IDs/aspects\n"
"NOSQLINJ CREATE TABLE fbGo (\n"
"    geneId varchar(255) not null,	# FlyBase ID\n"
"    goId varchar(255) not null,	# GO ID\n"
"    aspect varchar(255) not null,      # P (process), F (function) or C (cellular component)"
"              #Indices\n"
"    INDEX(geneId(11)),\n"
"    INDEX(goId(10))\n"
")\n");

sqlRemakeTable(conn, "fbUniProt", 
"#Links FlyBase gene IDs and UniProt IDs/aspects\n"
"NOSQLINJ CREATE TABLE fbUniProt (\n"
"    geneId varchar(255) not null,	# FlyBase ID\n"
"    uniProtId varchar(255) not null,	# UniProt ID\n"
"              #Indices\n"
"    INDEX(geneId(11)),\n"
"    INDEX(uniProtId(6))\n"
")\n");
}
void makeGoldAndGap(struct sqlConnection *conn, char *chromDir)
/* Read in .agp files in chromDir and use them to create the
 * gold and gap tables for the corresponding chromosome(s). */
{
    struct dyString *ds = newDyString(2048);
    struct fileInfo *fiList, *fi;
    char dir[256], chrom[128], ext[64];
    char goldName[128], gapName[128];
    char *agpName;
    char *ptr;
    char goldFileName[128];
    char gapFileName[128];

    if (! noLoad)
    {
        safef(goldFileName, ArraySize(goldFileName), "%s", goldTabName);
        safef(gapFileName, ArraySize(gapFileName), "%s", gapTabName);
    }
    fiList = listDirX(chromDir, "*.agp", TRUE);
    for (fi = fiList; fi != NULL; fi = fi->next)
    {

        /* Get full path name of .agp file and process it
         * into table names. */
        agpName = fi->name;
        printf("Processing %s\n", agpName);
        splitPath(agpName, dir, chrom, ext);
        while ((ptr = strchr(chrom, '.')) != NULL)
            *ptr = '_';
        sprintf(goldName, "%s_gold", chrom);
        sprintf(gapName, "%s_gap", chrom);

        if (noLoad)
        {
            safef(goldFileName, ArraySize(goldFileName), "%s_gold.tab", chrom);
            safef(gapFileName, ArraySize(gapFileName), "%s_gap.tab", chrom);
        }

        /* Create gold & gap tab separated files. */
        splitAgp(fi->name, goldFileName, gapFileName);

        /* Create gold table and load it up. */
        dyStringClear(ds);
        dyStringPrintf(ds, createGold, goldName);
        dyStringPrintf(ds, goldSplitIndex, maxFragNameSize);
        verbose(2, "%s", ds->string);
        if (! noLoad)
            sqlRemakeTable(conn, goldName, ds->string);
        dyStringClear(ds);
        dyStringPrintf(ds, "LOAD data local infile '%s' into table %s",
                       goldFileName, goldName);
        if (! noLoad)
        {
            sqlUpdate(conn, ds->string);
            remove(goldFileName);
        }

        /* Create gap table and load it up. */
        dyStringClear(ds);
        dyStringPrintf(ds, createGap, gapName);
        dyStringAppend(ds, gapSplitIndex);
        verbose(2, "%s", ds->string);
        if (! noLoad)
        {
            sqlRemakeTable(conn, gapName, ds->string);
            sqlMaybeMakeTable(conn, gapName, ds->string);
        }
        dyStringClear(ds);
        dyStringPrintf(ds, "LOAD data local infile '%s' into table %s",
                       gapFileName, gapName);
        if (! noLoad)
        {
            sqlUpdate(conn, ds->string);
            remove(gapFileName);
        }
    }
    freeDyString(&ds);
}