void mafSummaryTableCreate(struct sqlConnection *conn, char *tableName, int indexSize) /* Create a mafSummary table with the given name */ { char *createString = "#Positions and scores for alignment blocks\n" "CREATE TABLE %s (\n" " bin smallint unsigned not null,\n" " chrom varchar(255) not null, # Chromosome\n" " chromStart int unsigned not null, # Start position in chromosome\n" " chromEnd int unsigned not null, # End position in chromosome\n" " src varchar(255) not null, # Sequence name or database of alignment\n" " score float not null, # Floating point score (0.0 to 1.0).\n" " #Indices\n" " leftStatus char(1), # Status WRT preceding block\n" " rightStatus char(1), # Status WRT following block\n" " INDEX(chrom(%d),bin)\n" ")\n"; struct dyString *dy = newDyString(1024); sqlDyStringPrintf(dy, createString, tableName, indexSize, indexSize, indexSize); sqlRemakeTable(conn, tableName, dy->string); dyStringFree(&dy); }
static void deleteJoin(struct sqlDeleter* sd, struct sqlConnection *conn, char* table, char* column) /* delete by creating a new table with a join */ { char query[512], newTmpTable[64], oldTmpTable[64]; if (sd->accLoader != NULL) { /* build table, free to indicate it's completed */ assert(!sd->deletesDone); sqlRemakeTable(conn, GB_DELETE_TMP, createGbDeleteTmp); sqlUpdaterCommit(sd->accLoader, conn); sqlUpdaterFree(&sd->accLoader); } sd->deletesDone = TRUE; /* remove existing tmp tables */ safef(newTmpTable, sizeof(newTmpTable), "%s_new_tmp", table); safef(oldTmpTable, sizeof(oldTmpTable), "%s_old_tmp", table); sqlDropTable(conn, newTmpTable); sqlDropTable(conn, oldTmpTable); gbSqlDupTableDef(conn, table, newTmpTable); /* do join into new table of entries not in accession table */ safef(query, sizeof(query), "INSERT INTO %s SELECT %s.* FROM %s LEFT JOIN %s " "ON (%s.%s = %s.acc) WHERE %s.acc IS NULL", newTmpTable, table, table, GB_DELETE_TMP, table, column, GB_DELETE_TMP, GB_DELETE_TMP); sqlUpdate(conn, query); /* Now swap the table into place */ safef(query, sizeof(query), "RENAME TABLE %s TO %s, %s TO %s", table, oldTmpTable, newTmpTable, table); sqlUpdate(conn, query); sqlDropTable(conn, oldTmpTable); }
void gtexGeneBedCreateTable(struct sqlConnection *conn, char *table) /* Create expression record format table of given name. */ { char query[1024]; sqlSafef(query, sizeof(query), "CREATE TABLE %s (\n" " chrom varchar(255) not null, # Reference sequence chromosome or scaffold\n" " chromStart int unsigned not null, # Start position in chromosome\n" " chromEnd int unsigned not null, # End position in chromosome\n" " name varchar(255) not null, # Gene symbol\n" " score int unsigned not null, # Score from 0-1000\n" " strand char(1) not null, # + or - for strand\n" " geneId varchar(255) not null, # Ensembl gene ID, referenced in GTEx data tables\n" " transcriptId varchar(255) not null, # Ensembl ID of Canonical transcript; determines genomic position\n" " transcriptClass varchar(255) not null, # GENCODE transcript class (coding, nonCoding, pseudo)\n" " expCount int unsigned not null, # Number of experiment values\n" " expScores longblob not null, # Comma separated list of experiment scores\n" "#Indices\n" " PRIMARY KEY(geneId)\n" ")\n", table); sqlRemakeTable(conn, table, query); }
struct hash *loadMotifWeights(struct sqlConnection *conn, char *fileName, char *table) /* Load in XML weight motif file and save it in tab-separated format * and in hash keyed by motif name. */ { struct esmMotifs *motifs = esmMotifsLoad(fileName); struct esmMotif *motif; FILE *f = hgCreateTabFile(tmpDir, table); struct dyString *dy = dyStringNew(512); struct hash *hash = newHash(16); for (motif = motifs->esmMotif; motif != NULL; motif = motif->next) { struct esmWeights *weights = motif->esmWeights; int posCount = slCount(weights->esmPosition); struct esmPosition *pos; struct dnaMotif *dm; char name[64]; fixMotifName(motif->Name, name, sizeof(name)); AllocVar(dm); dm->name = cloneString(name); dm->columnCount = posCount; AllocArray(dm->aProb, posCount); AllocArray(dm->cProb, posCount); AllocArray(dm->gProb, posCount); AllocArray(dm->tProb, posCount); for (pos = weights->esmPosition; pos != NULL; pos = pos->next) { char *row[5]; double odds[4], sumOdds = 0; int i; int ix = pos->Num; int rowSize = chopString(pos->Weights, ";", row, ArraySize(row)); if (rowSize != 4) errAbort("Expecting 4 values for weights in position %d of Motif %s", pos->Num, motif->Name); if (ix >= posCount) errAbort("Num %d out of range in Motif %s", ix, motif->Name); for (i=0; i<4; ++i) { odds[i] = exp(atof(row[0])); sumOdds += odds[i]; } dm->aProb[ix] = odds[0]/sumOdds; dm->cProb[ix] = odds[1]/sumOdds; dm->gProb[ix] = odds[2]/sumOdds; dm->tProb[ix] = odds[3]/sumOdds; } dnaMotifTabOut(dm, f); hashAdd(hash, dm->name, dm); } dyStringPrintf(dy, "CREATE TABLE %s (\n" " name varchar(16) not null, # Motif name.\n" " columnCount int not null, # Count of columns in motif.\n" " aProb longblob not null, # Probability of A's in each column.\n" " cProb longblob not null, # Probability of C's in each column.\n" " gProb longblob not null, # Probability of G's in each column.\n" " tProb longblob not null, # Probability of T's in each column.\n" " #Indices\n" " PRIMARY KEY(name)\n" ")\n", table); sqlRemakeTable(conn, table, dy->string); hgLoadTabFile(conn, tmpDir, table, &f); hgRemoveTabFile(tmpDir, table); verbose(1, "Processed %d motifs into %s\n", slCount(motifs->esmMotif), table); return hash; }
void loadGeneToMotif(struct sqlConnection *conn, char *fileName, char *table, struct hash *geneToModuleHash, struct hash *moduleAndMotifHash, struct hash *motifHash, struct hash *positionsHash, char *regionTable) /* Load file which is a big matrix with genes for rows and motifs for * columns. There is a semicolon-separated list of numbers in the matrix * where a gene has the motif, and an empty (tab separated) field * where there is no motif. The numbers are relative to the * region associated with the gene in the positionsHash. * Only load bits of this where motif actually occurs in module associated * with gene. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line; FILE *f = hgCreateTabFile(tmpDir, table); char *motifNames[32*1024], *row[32*1024]; int motifCount, rowSize, i; char *gene, *module; int geneCount = 0, total = 0; struct dyString *dy = dyStringNew(512); struct genomePos *motifPosList = NULL, *motifPosForGene; struct genomePos *regionPosList = NULL, *regionPos; /* Read first line, which is labels. */ if (!lineFileNextReal(lf, &line)) errAbort("Empty file %s", fileName); subChar(line, ' ', '_'); motifCount = chopLine(line, motifNames); if (motifCount >= ArraySize(motifNames)) errAbort("Too many motifs line 1 of %s", fileName); lineFileExpectAtLeast(lf, 2, motifCount); motifNames[0] = NULL; for (i=1; i<motifCount; ++i) { char name[64]; motifNames[i] = cloneString(fixMotifName(motifNames[i],name,sizeof(name))); if (!hashLookup(motifHash, motifNames[i])) errAbort("Motif %s is in %s but not modules_motifs.gxm", motifNames[i], fileName); } /* Read subsequent lines. */ while ((rowSize = lineFileChopTab(lf, row)) != 0) { lineFileExpectWords(lf, motifCount, rowSize); gene = row[0]; module = hashFindVal(geneToModuleHash, gene); if (module == NULL) { warn("WARNING: Gene %s in line %d of %s but not module_assignments.tab", gene, lf->lineIx, lf->fileName); continue; } regionPos = NULL; for (i=1; i<rowSize; ++i) { if (row[i][0] != 0) { if (hashLookup2(moduleAndMotifHash, module, motifNames[i])) { regionPos = hashFindVal(positionsHash, gene); if (regionPos == NULL) { warn("WARNING: %s in %s but not gene_positions.tab", gene, fileName); i = rowSize; continue; } motifPosForGene = convertMotifPos(row[i], regionPos, hashMustFindVal(motifHash, motifNames[i]), lf); motifPosList = slCat(motifPosForGene, motifPosList); ++total; } } } if (regionPos != NULL) { slAddHead(®ionPosList, regionPos); } ++geneCount; } lineFileClose(&lf); /* Output sorted table of all motif hits. */ { struct genomePos *pos; slSort(&motifPosList, genomePosCmp); for (pos = motifPosList; pos != NULL; pos = pos->next) { int start = pos->start; int end = pos->end; if (start < 0) start = 0; fprintf(f, "%d\t", binFromRange(start, end)); fprintf(f, "%s\t", pos->chrom); fprintf(f, "%d\t%d\t", start, end); fprintf(f, "%s\t", pos->motif); fprintf(f, "%d\t", pos->score); fprintf(f, "%c\t", pos->strand); fprintf(f, "%s\n", pos->name); } dyStringPrintf(dy, "CREATE TABLE %s (\n" " bin smallInt unsigned not null,\n" " chrom varChar(255) not null,\n" " chromStart int not null,\n" " chromEnd int not null,\n" " name varchar(255) not null,\n" " score int not null,\n" " strand char(1) not null,\n" " gene varchar(255) not null,\n" " #Indices\n" " INDEX(gene(12)),\n" " INDEX(name(16)),\n" " INDEX(chrom(8),bin)\n" ")\n", table); sqlRemakeTable(conn, table, dy->string); verbose(1, "%d genes, %d motifs, %d motifs in genes\n", geneCount, motifCount-1, total); hgLoadTabFile(conn, tmpDir, table, &f); // hgRemoveTabFile(tmpDir, table); verbose(1, "Loaded %s table\n", table); slFreeList(&motifPosList); } /* Now output sorted table of upstream regions. */ { FILE *f = hgCreateTabFile(tmpDir, regionTable); struct genomePos *pos; dyStringClear(dy); dyStringPrintf(dy, "CREATE TABLE %s (\n" " bin smallInt unsigned not null,\n" " chrom varChar(255) not null,\n" " chromStart int not null,\n" " chromEnd int not null,\n" " name varchar(255) not null,\n" " score int not null,\n" " strand char(1) not null,\n" " #Indices\n" " INDEX(name(16)),\n" " INDEX(chrom(8),bin)\n" ")\n", regionTable); sqlRemakeTable(conn, regionTable, dy->string); slSort(®ionPosList, genomePosCmp); for (pos = regionPosList; pos != NULL; pos = pos->next) { int start = pos->start; int end = pos->end; if (start < 0) start = 0; fprintf(f, "%d\t", binFromRange(start, end)); fprintf(f, "%s\t", pos->chrom); fprintf(f, "%d\t%d\t", start, end); fprintf(f, "%s\t", pos->name); fprintf(f, "%d\t", pos->score); fprintf(f, "%c\n", pos->strand); } hgLoadTabFile(conn, tmpDir, regionTable, &f); // hgRemoveTabFile(tmpDir, regionTable); } }
void loadDatabase(char *database, char *tab, char *track) /* Load database from tab file. */ { struct sqlConnection *conn = sqlConnect(database); struct dyString *dy = newDyString(1024); /* First make table definition. */ if (sqlTable != NULL) { /* Read from file. */ char *sql, *s; readInGulp(sqlTable, &sql, NULL); /* Chop of end-of-statement semicolon if need be. */ s = strchr(sql, ';'); if (s != NULL) *s = 0; sqlRemakeTable(conn, track, sql); freez(&sql); } else if (!oldTable) { /* Create definition statement. */ verbose(1, "Creating table definition for %s\n", track); sqlDyStringPrintf(dy, "CREATE TABLE %s (\n", track); if (!noBin) dyStringAppend(dy, " bin smallint unsigned not null,\n"); dyStringAppend(dy, " level int unsigned not null,\n"); dyStringAppend(dy, " tName varchar(255) not null,\n"); dyStringAppend(dy, " tStart int unsigned not null,\n"); dyStringAppend(dy, " tEnd int unsigned not null,\n"); dyStringAppend(dy, " strand char(1) not null,\n"); dyStringAppend(dy, " qName varchar(255) not null,\n"); dyStringAppend(dy, " qStart int unsigned not null,\n"); dyStringAppend(dy, " qEnd int unsigned not null,\n"); dyStringAppend(dy, " chainId int unsigned not null,\n"); dyStringAppend(dy, " ali int unsigned not null,\n"); dyStringAppend(dy, " score double not null,\n"); dyStringAppend(dy, " qOver int not null, \n"); dyStringAppend(dy, " qFar int not null, \n"); dyStringAppend(dy, " qDup int not null, \n"); dyStringAppend(dy, " type varchar(255) not null,\n"); dyStringAppend(dy, " tN int not null, \n"); dyStringAppend(dy, " qN int not null, \n"); dyStringAppend(dy, " tR int not null, \n"); dyStringAppend(dy, " qR int not null, \n"); dyStringAppend(dy, " tNewR int not null, \n"); dyStringAppend(dy, " qNewR int not null, \n"); dyStringAppend(dy, " tOldR int not null, \n"); dyStringAppend(dy, " qOldR int not null, \n"); dyStringAppend(dy, " tTrf int not null, \n"); dyStringAppend(dy, " qTrf int not null, \n"); dyStringAppend(dy, "#Indices\n"); if (!noBin) dyStringAppend(dy, " INDEX(tName(16),bin),\n"); dyStringAppend(dy, " INDEX(tName(16),tStart)\n"); dyStringAppend(dy, ")\n"); sqlRemakeTable(conn, track, dy->string); } dyStringClear(dy); sqlDyStringPrintf(dy, "load data local infile '%s' into table %s", tab, track); verbose(1, "Loading %s into %s\n", track, database); sqlUpdate(conn, dy->string); /* add a comment to the history table and finish up connection */ hgHistoryComment(conn, "Loaded net table %s", track); sqlDisconnect(&conn); }
static void loadDatabase(char *database, char *track, int bedSize, struct bedStub *bedList) /* Load database from bedList. */ { struct sqlConnection *conn; struct dyString *dy = newDyString(1024); char *tab = (char *)NULL; int loadOptions = (optionExists("onServer") ? SQL_TAB_FILE_ON_SERVER : 0); if ( ! noLoad ) conn = sqlConnect(database); if ((char *)NULL != tmpDir) tab = cloneString(rTempName(tmpDir,"loadBed",".tab")); else tab = cloneString("bed.tab"); if (bedDetail && sqlTable == NULL) errAbort("bedDetail format requires sqlTable option"); if (bedDetail && !strictTab) errAbort("bedDetail format must be tab separated"); if (bedDetail && !noBin) noBin = TRUE; /* First make table definition. */ if (sqlTable != NULL && !oldTable) { /* Read from file. */ char *sql, *s; readInGulp(sqlTable, &sql, NULL); /* Chop off end-of-statement semicolon if need be. */ s = strchr(sql, ';'); if (s != NULL) *s = 0; if ( !noLoad ) { if (renameSqlTable) { char *pos = stringIn("CREATE TABLE ", sql); if (pos == NULL) errAbort("Can't find CREATE TABLE in %s\n", sqlTable); char *oldSql = cloneString(sql); nextWord(&pos); nextWord(&pos); char *tableName = nextWord(&pos); sql = replaceChars(oldSql, tableName, track); } verbose(1, "Creating table definition for %s\n", track); sqlRemakeTable(conn, track, sql); if (!noBin) addBinToEmptyTable(conn, track); adjustSqlTableColumns(conn, track, bedSize); } freez(&sql); } else if (!oldTable) { int minLength; if (noLoad) minLength=6; else if (maxChromNameLength) minLength = maxChromNameLength; else minLength = hGetMinIndexLength(database); verbose(2, "INDEX chrom length: %d\n", minLength); /* Create definition statement. */ verbose(1, "Creating table definition for %s\n", track); dyStringPrintf(dy, "CREATE TABLE %s (\n", track); if (!noBin) dyStringAppend(dy, " bin smallint unsigned not null,\n"); dyStringAppend(dy, " chrom varchar(255) not null,\n"); dyStringAppend(dy, " chromStart int unsigned not null,\n"); dyStringAppend(dy, " chromEnd int unsigned not null,\n"); if (bedSize >= 4) maybeBedGraph(4, dy, " name varchar(255) not null,\n"); if (bedSize >= 5) { if (allowNegativeScores) maybeBedGraph(5, dy, " score int not null,\n"); else maybeBedGraph(5, dy, " score int unsigned not null,\n"); } if (bedSize >= 6) maybeBedGraph(6, dy, " strand char(1) not null,\n"); if (bedSize >= 7) maybeBedGraph(7, dy, " thickStart int unsigned not null,\n"); if (bedSize >= 8) maybeBedGraph(8, dy, " thickEnd int unsigned not null,\n"); /* As of 2004-11-22 the reserved field is used as itemRgb in code */ if (bedSize >= 9) maybeBedGraph(9, dy, " reserved int unsigned not null,\n"); if (bedSize >= 10) maybeBedGraph(10, dy, " blockCount int unsigned not null,\n"); if (bedSize >= 11) maybeBedGraph(11, dy, " blockSizes longblob not null,\n"); if (bedSize >= 12) maybeBedGraph(12, dy, " chromStarts longblob not null,\n"); if (bedSize >= 13) maybeBedGraph(13, dy, " expCount int unsigned not null,\n"); if (bedSize >= 14) maybeBedGraph(14, dy, " expIds longblob not null,\n"); if (bedSize >= 15) maybeBedGraph(15, dy, " expScores longblob not null,\n"); dyStringAppend(dy, "#Indices\n"); if (nameIx && (bedSize >= 4) && (0 == bedGraph)) dyStringAppend(dy, " INDEX(name(16)),\n"); if (noBin) { dyStringPrintf(dy, " INDEX(chrom(%d),chromStart)\n", minLength); } else { dyStringPrintf(dy, " INDEX(chrom(%d),bin)\n", minLength); } dyStringAppend(dy, ")\n"); if (noLoad) verbose(2,"%s", dy->string); else sqlRemakeTable(conn, track, dy->string); } verbose(1, "Saving %s\n", tab); writeBedTab(tab, bedList, bedSize); if ( ! noLoad ) { verbose(1, "Loading %s\n", database); if (customTrackLoader) sqlLoadTabFile(conn, tab, track, loadOptions|SQL_TAB_FILE_WARN_ON_WARN); else sqlLoadTabFile(conn, tab, track, loadOptions); if (! noHistory) { char comment[256]; /* add a comment to the history table and finish up connection */ safef(comment, sizeof(comment), "Add %d element(s) from bed list to %s table", slCount(bedList), track); hgHistoryComment(conn, comment); } if(fillInScoreColumn != NULL) { char query[500]; char buf[500]; struct sqlResult *sr; safef(query, sizeof(query), "select sum(score) from %s", track); if(sqlQuickQuery(conn, query, buf, sizeof(buf))) { unsigned sum = sqlUnsigned(buf); if (!sum) { safef(query, sizeof(query), "select min(%s), max(%s) from %s", fillInScoreColumn, fillInScoreColumn, track); if ((sr = sqlGetResult(conn, query)) != NULL) { char **row = sqlNextRow(sr); if(row != NULL) { float min = sqlFloat(row[0]); float max = sqlFloat(row[1]); if ( !(max == -1 && min == -1)) // if score is -1 then ignore, as if it werent present { if (max == min || sameString(row[0],row[1])) // this will lead to 'inf' score value in SQL update causing an error errAbort("Could not set score in table %s max(%s)=min(%s)=%s\n", track, fillInScoreColumn, fillInScoreColumn, row[0]); sqlFreeResult(&sr); // Calculate a, b s/t f(x) = ax + b maps min-max => minScore-1000 float a = (1000-minScore) / (max - min); float b = 1000 - ((1000-minScore) * max) / (max - min); safef(query, sizeof(query), "update %s set score = round((%f * %s) + %f)", track, a, fillInScoreColumn, b); int changed = sqlUpdateRows(conn, query, NULL); verbose(2, "update query: %s; changed: %d\n", query, changed); } else { sqlFreeResult(&sr); verbose(2, "score not updated; all values for column %s are -1\n", fillInScoreColumn); } } } } } } sqlDisconnect(&conn); /* if temp dir specified, unlink file to make it disappear */ if ((char *)NULL != tmpDir) unlink(tab); } else verbose(1, "No load option selected, see file: %s\n", tab); } /* static void loadDatabase() */
void hgTrackDb(char *org, char *database, char *trackDbName, char *sqlFile, char *hgRoot, boolean strict) /* hgTrackDb - Create trackDb table from text files. */ { struct trackDb *td; char tab[PATH_LEN]; safef(tab, sizeof(tab), "%s.tab", trackDbName); struct trackDb *tdbList = buildTrackDb(org, database, hgRoot, strict); tdbList = flatten(tdbList); slSort(&tdbList, trackDbCmp); verbose(1, "Loaded %d track descriptions total\n", slCount(tdbList)); /* Write to tab-separated file; hold off on html, since it must be encoded */ { verbose(2, "Starting write of tabs to %s\n", tab); FILE *f = mustOpen(tab, "w"); for (td = tdbList; td != NULL; td = td->next) { hVarSubstTrackDb(td, database); char *hold = td->html; td->html = ""; subChar(td->type, '\t', ' '); /* Tabs confuse things. */ subChar(td->shortLabel, '\t', ' '); /* Tabs confuse things. */ subChar(td->longLabel, '\t', ' '); /* Tabs confuse things. */ trackDbTabOut(td, f); td->html = hold; } carefulClose(&f); verbose(2, "Wrote tab representation to %s\n", tab); } /* Update database */ { char *create, *end; char query[256]; struct sqlConnection *conn = sqlConnect(database); /* Load in table definition. */ readInGulp(sqlFile, &create, NULL); create = trimSpaces(create); create = substituteTrackName(create, trackDbName); end = create + strlen(create)-1; if (*end == ';') *end = 0; sqlRemakeTable(conn, trackDbName, create); /* Load in regular fields. */ sqlSafef(query, sizeof(query), "load data local infile '%s' into table %s", tab, trackDbName); verbose(2, "sending mysql \"%s\"\n", query); sqlUpdate(conn, query); verbose(2, "done tab file load"); /* Load in html and settings fields. */ for (td = tdbList; td != NULL; td = td->next) { if (isEmpty(td->html)) { if (strict && !trackDbLocalSetting(td, "parent") && !trackDbLocalSetting(td, "superTrack") && !sameString(td->track,"cytoBandIdeo")) { fprintf(stderr, "Warning: html missing for %s %s %s '%s'\n",org, database, td->track, td->shortLabel); } } else { updateBigTextField(conn, trackDbName, "tableName", td->track, "html", td->html); } if (td->settingsHash != NULL) { char *settings = settingsFromHash(td->settingsHash); updateBigTextField(conn, trackDbName, "tableName", td->track, "settings", settings); if (showSettings) { verbose(1, "%s: type='%s';", td->track, td->type); if (isNotEmpty(settings)) { char *oneLine = replaceChars(settings, "\n", "; "); eraseTrailingSpaces(oneLine); verbose(1, " %s", oneLine); freeMem(oneLine); } verbose(1, "\n"); } freeMem(settings); } } sqlDisconnect(&conn); verbose(1, "Loaded database %s\n", database); } }
void hgLoadChromGraph(boolean doLoad, char *db, char *track, char *fileName) /* hgLoadChromGraph - Load up chromosome graph. */ { double minVal,maxVal; struct chromGraph *el, *list; FILE *f; char *tempDir = "."; char path[PATH_LEN], gbdbPath[PATH_LEN]; char *idTable = optionVal("idTable", NULL); char *pathPrefix = NULL; if (idTable == NULL) list = chromGraphLoadAll(fileName); else list = chromGraphListWithTable(fileName, db, idTable); if (list == NULL) errAbort("%s is empty", fileName); /* Figure out min/max values */ minVal = maxVal = list->val; for (el = list->next; el != NULL; el = el->next) { if (optionExists("minusLog10")) { if (el->val == 1) el->val = 0; else if (el->val > 0) el->val = -1 * log(el->val)/log(10); } if (el->val < minVal) minVal = el->val; if (el->val > maxVal) maxVal = el->val; } /* Sort and write out temp file. */ slSort(&list, chromGraphCmp); f = hgCreateTabFile(tempDir, track); for (el = list; el != NULL; el = el->next) chromGraphTabOut(el, f); if (doLoad) { struct dyString *dy = dyStringNew(0); struct sqlConnection *conn; /* Set up connection to database and create main table. */ conn = hAllocConn(db); sqlDyStringPrintf(dy, createString, track, hGetMinIndexLength(db)); sqlRemakeTable(conn, track, dy->string); /* Load main table and clean up file handle. */ hgLoadTabFile(conn, tempDir, track, &f); hgRemoveTabFile(tempDir, track); /* If need be create meta table. If need be delete old row. */ if (!sqlTableExists(conn, "metaChromGraph")) sqlUpdate(conn, metaCreateString); else { dyStringClear(dy); sqlDyStringPrintf(dy, "delete from metaChromGraph where name = '%s'", track); sqlUpdate(conn, dy->string); } /* Make chrom graph file */ safef(path, sizeof(path), "%s.cgb", track); chromGraphToBin(list, path); safef(path, sizeof(path), "/gbdb/%s/chromGraph", db); pathPrefix = optionVal("pathPrefix", path); safef(gbdbPath, sizeof(gbdbPath), "%s/%s.cgb", pathPrefix, track); /* Create new line in meta table */ dyStringClear(dy); sqlDyStringPrintf(dy, "insert into metaChromGraph values('%s',%f,%f,'%s');", track, minVal, maxVal, gbdbPath); sqlUpdate(conn, dy->string); } }
void hgFindSpec(char *org, char *database, char *hgFindSpecName, char *sqlFile, char *hgRoot, boolean strict) /* hgFindSpec - Create hgFindSpec table from text files. */ { struct hash *uniqHash = newHash(8); struct hash *htmlHash = newHash(8); struct hgFindSpec *hfsList = NULL, *hfs; char rootDir[512], orgDir[512], asmDir[512]; char tab[512]; snprintf(tab, sizeof(tab), "%s.tab", hgFindSpecName); /* Create track list from hgRoot and hgRoot/org and hgRoot/org/assembly * ra format database. */ sprintf(rootDir, "%s", hgRoot); sprintf(orgDir, "%s/%s", hgRoot, org); sprintf(asmDir, "%s/%s/%s", hgRoot, org, database); layerOn(strict, database, asmDir, uniqHash, htmlHash, FALSE, &hfsList); layerOn(strict, database, orgDir, uniqHash, htmlHash, FALSE, &hfsList); layerOn(strict, database, rootDir, uniqHash, htmlHash, TRUE, &hfsList); slSort(&hfsList, hgFindSpecCmp); if (verboseLevel() > 0) printf("Loaded %d search specs total\n", slCount(hfsList)); /* Write to tab-separated file. */ { FILE *f = mustOpen(tab, "w"); for (hfs = hfsList; hfs != NULL; hfs = hfs->next) hgFindSpecTabOut(hfs, f); carefulClose(&f); } /* Update database */ { char *create, *end; char query[256]; struct sqlConnection *conn = sqlConnect(database); /* Load in table definition. */ readInGulp(sqlFile, &create, NULL); create = trimSpaces(create); create = subTrackName(create, hgFindSpecName); end = create + strlen(create)-1; if (*end == ';') *end = 0; sqlRemakeTable(conn, hgFindSpecName, create); /* Load in regular fields. */ sqlSafef(query, sizeof query, "load data local infile '%s' into table %s", tab, hgFindSpecName); sqlUpdate(conn, query); /* Load in settings fields. */ for (hfs = hfsList; hfs != NULL; hfs = hfs->next) { if (hfs->settingsHash != NULL) { char *settings = settingsFromHash(hfs->settingsHash); updateBigTextField(conn, hgFindSpecName, "searchName", hfs->searchName, "searchSettings", settings); freeMem(settings); } } sqlDisconnect(&conn); if (verboseLevel() > 0) printf("Loaded database %s\n", database); } }
void remakeTables(struct sqlConnection *conn) /* Remake all our tables. */ { sqlRemakeTable(conn, "fbGene", "#Links FlyBase IDs, gene symbols and gene names\n" "NOSQLINJ CREATE TABLE fbGene (\n" " geneId varchar(255) not null, # FlyBase ID\n" " geneSym varchar(255) not null, # Short gene symbol\n" " geneName varchar(255) not null, # Gene name - up to a couple of words\n" " #Indices\n" " PRIMARY KEY(geneId(11)),\n" " INDEX(geneSym(8)),\n" " INDEX(geneName(12))\n" ")\n"); sqlRemakeTable(conn, "fbTranscript", "#Links FlyBase gene IDs and BDGP transcripts\n" "NOSQLINJ CREATE TABLE fbTranscript (\n" " geneId varchar(255) not null, # FlyBase ID\n" " transcriptId varchar(255) not null, # BDGP Transcript ID\n" " #Indices\n" " PRIMARY KEY(transcriptId(11)),\n" " INDEX(transcriptId(11))\n" ")\n"); sqlRemakeTable(conn, "fbSynonym", "#Links all the names we call a gene to it's flybase ID\n" "NOSQLINJ CREATE TABLE fbSynonym (\n" " geneId varchar(255) not null, # FlyBase ID\n" " name varchar(255) not null, # A name (synonym or real\n" " #Indices\n" " INDEX(geneId(11)),\n" " INDEX(name(12))\n" ")\n"); sqlRemakeTable(conn, "fbAllele", "#The alleles of a gene\n" "NOSQLINJ CREATE TABLE fbAllele (\n" " id int not null, # Allele ID\n" " geneId varchar(255) not null, # Flybase ID of gene\n" " name varchar(255) not null, # Allele name\n" " #Indices\n" " PRIMARY KEY(id),\n" " INDEX(geneId(11))\n" ")\n"); sqlRemakeTable(conn, "fbRef", "#A literature or sometimes database reference\n" "NOSQLINJ CREATE TABLE fbRef (\n" " id int not null, # Reference ID\n" " text longblob not null, # Usually begins with flybase ref ID, but not always\n" " #Indices\n" " PRIMARY KEY(id)\n" ")\n"); sqlRemakeTable(conn, "fbRole", "#Role of gene in wildType\n" "NOSQLINJ CREATE TABLE fbRole (\n" " geneId varchar(255) not null, # Flybase Gene ID\n" " fbAllele int not null, # ID in fbAllele table or 0 if not allele-specific\n" " fbRef int not null, # ID in fbRef table\n" " text longblob not null, # Descriptive text\n" " #Indices\n" " INDEX(geneId(11))\n" ")\n"); sqlRemakeTable(conn, "fbPhenotype", "#Observed phenotype in mutant. Sometimes contains gene function info\n" "NOSQLINJ CREATE TABLE fbPhenotype (\n" " geneId varchar(255) not null, # Flybase Gene ID\n" " fbAllele int not null, # ID in fbAllele table or 0 if not allele-specific\n" " fbRef int not null, # ID in fbRef table\n" " text longblob not null, # Descriptive text\n" " #Indices\n" " INDEX(geneId(11))\n" ")\n"); sqlRemakeTable(conn, "fbGo", "#Links FlyBase gene IDs and GO IDs/aspects\n" "NOSQLINJ CREATE TABLE fbGo (\n" " geneId varchar(255) not null, # FlyBase ID\n" " goId varchar(255) not null, # GO ID\n" " aspect varchar(255) not null, # P (process), F (function) or C (cellular component)" " #Indices\n" " INDEX(geneId(11)),\n" " INDEX(goId(10))\n" ")\n"); sqlRemakeTable(conn, "fbUniProt", "#Links FlyBase gene IDs and UniProt IDs/aspects\n" "NOSQLINJ CREATE TABLE fbUniProt (\n" " geneId varchar(255) not null, # FlyBase ID\n" " uniProtId varchar(255) not null, # UniProt ID\n" " #Indices\n" " INDEX(geneId(11)),\n" " INDEX(uniProtId(6))\n" ")\n"); }
void makeGoldAndGap(struct sqlConnection *conn, char *chromDir) /* Read in .agp files in chromDir and use them to create the * gold and gap tables for the corresponding chromosome(s). */ { struct dyString *ds = newDyString(2048); struct fileInfo *fiList, *fi; char dir[256], chrom[128], ext[64]; char goldName[128], gapName[128]; char *agpName; char *ptr; char goldFileName[128]; char gapFileName[128]; if (! noLoad) { safef(goldFileName, ArraySize(goldFileName), "%s", goldTabName); safef(gapFileName, ArraySize(gapFileName), "%s", gapTabName); } fiList = listDirX(chromDir, "*.agp", TRUE); for (fi = fiList; fi != NULL; fi = fi->next) { /* Get full path name of .agp file and process it * into table names. */ agpName = fi->name; printf("Processing %s\n", agpName); splitPath(agpName, dir, chrom, ext); while ((ptr = strchr(chrom, '.')) != NULL) *ptr = '_'; sprintf(goldName, "%s_gold", chrom); sprintf(gapName, "%s_gap", chrom); if (noLoad) { safef(goldFileName, ArraySize(goldFileName), "%s_gold.tab", chrom); safef(gapFileName, ArraySize(gapFileName), "%s_gap.tab", chrom); } /* Create gold & gap tab separated files. */ splitAgp(fi->name, goldFileName, gapFileName); /* Create gold table and load it up. */ dyStringClear(ds); dyStringPrintf(ds, createGold, goldName); dyStringPrintf(ds, goldSplitIndex, maxFragNameSize); verbose(2, "%s", ds->string); if (! noLoad) sqlRemakeTable(conn, goldName, ds->string); dyStringClear(ds); dyStringPrintf(ds, "LOAD data local infile '%s' into table %s", goldFileName, goldName); if (! noLoad) { sqlUpdate(conn, ds->string); remove(goldFileName); } /* Create gap table and load it up. */ dyStringClear(ds); dyStringPrintf(ds, createGap, gapName); dyStringAppend(ds, gapSplitIndex); verbose(2, "%s", ds->string); if (! noLoad) { sqlRemakeTable(conn, gapName, ds->string); sqlMaybeMakeTable(conn, gapName, ds->string); } dyStringClear(ds); dyStringPrintf(ds, "LOAD data local infile '%s' into table %s", gapFileName, gapName); if (! noLoad) { sqlUpdate(conn, ds->string); remove(gapFileName); } } freeDyString(&ds); }