void loadOneTable(char *database, struct sqlConnection *conn, char *tempName, char *tableName) /* Load .tab file tempName into tableName and remove tempName. */ { struct dyString *query = newDyString(1024); verbose(1, "Loading up table %s\n", tableName); if (sqlTableExists(conn, tableName)) { sqlDyStringPrintf(query, "DROP table %s", tableName); sqlUpdate(conn, query->string); } /* Create first part of table definitions, the fields. */ dyStringClear(query); sqlDyStringPrintf(query, createRmskOut, tableName); /* Create the indexes */ int indexLen = hGetMinIndexLength(database); sqlDyStringPrintf(query, " INDEX(genoName(%d),bin))\n", indexLen); sqlUpdate(conn, query->string); /* Load database from tab-file. */ dyStringClear(query); sqlDyStringPrintf(query, "LOAD data local infile '%s' into table %s", tempName, tableName); sqlUpdate(conn, query->string); remove(tempName); }
void setupTable(char *db, struct sqlConnection *conn, char *table) /* create a psl table as needed */ { unsigned sqlOpts = gBin ? genePredWithBin : 0; unsigned fldOpts = gGenePredExt ? genePredAllFlds : 0; char* sqlCmd = genePredGetCreateSql(table, fldOpts, sqlOpts, hGetMinIndexLength(db)); sqlRemakeTable(conn, table, sqlCmd); freez(&sqlCmd); }
static void createGeneTbl(struct gbGeneTbl *ggt, struct sqlConnection* conn) /* create a genePred table */ { char *sql = genePredGetCreateSql(ggt->tbl, (ggt->hasExtCols ? genePredAllFlds : 0), (ggt->hasBin ? genePredWithBin : 0), hGetMinIndexLength(sqlGetDatabase(conn))); sqlRemakeTable(conn, ggt->tbl, sql); freeMem(sql); }
void ldGencodeIntron(char *database, char *table, int gtfCount, char *gtfNames[]) /* Load Gencode intron status table from GTF files with * intron_id and intron_status keywords */ { struct gffFile *gff, *gffList = NULL; struct gffLine *gffLine; struct gencodeIntron *intron, *intronList = NULL; struct sqlConnection *conn; FILE *f; int i; int introns = 0; for (i=0; i<gtfCount; i++) { verbose(1, "Reading %s\n", gtfNames[i]); gff = gffRead(gtfNames[i]); for (gffLine = gff->lineList; gffLine != NULL; gffLine = gffLine->next) { if (sameWord(gffLine->feature, "intron")) { AllocVar(intron); intron->chrom = gffLine->seq; intron->chromStart = gffLine->start; intron->chromEnd = gffLine->end; intron->name = gffLine->intronId; intron->strand[0] = gffLine->strand; intron->strand[1] = 0; intron->status = gffLine->intronStatus; intron->transcript = gffLine->group; intron->geneId = gffLine->geneId; slAddHead(&intronList, intron); verbose(2, "%s %s\n", intron->chrom, intron->name); introns++; } } } slSort(&intronList, bedCmp); f = hgCreateTabFile(".", table); for (intron = intronList; intron != NULL; intron = intron->next) gencodeIntronTabOut(intron, f); carefulClose(&f); verbose(1, "%d introns in %d files\n", introns, gtfCount); hSetDb(database); conn = sqlConnect(database); gencodeIntronTableCreate(conn, table, hGetMinIndexLength()); hgLoadTabFile(conn, ".", table, &f); sqlDisconnect(&conn); }
void setupTable(char *database, struct sqlConnection *conn, char* table) /* create a psl table as needed */ { int minLength = hGetMinIndexLength(database); char *sqlCmd = pslGetCreateSql(table, pslCreateOpts, (pslCreateOpts & PSL_TNAMEIX) ? minLength : 0); if (append) { checkBinConsistent(conn, table); sqlMaybeMakeTable(conn, table, sqlCmd); } else sqlRemakeTable(conn, table, sqlCmd); freez(&sqlCmd); }
static void createFlatTbl(struct gbGeneTbl *ggt, struct sqlConnection* conn) /* create a genePred flat table */ { /* edit generated SQL to add geneName column and index */ char *tmpDef = genePredGetCreateSql(ggt->flatTbl, 0, 0, hGetMinIndexLength(sqlGetDatabase(conn))); char *part2 = strchr(tmpDef, '('); *(part2++) = '\0'; char *p = strrchr(part2, ')'); *p = '\0'; char editDef[1024]; safef(editDef, sizeof(editDef), "%s(geneName varchar(255) not null, %s, INDEX(geneName(10)))", tmpDef, part2); freeMem(tmpDef); sqlRemakeTable(conn, ggt->flatTbl, editDef); }
static void loadTables(char *hgDb, char *ccdsInfoTbl, char *ccdsInfoFile, char *ccdsGeneTbl, char *ccdsGeneFile, char *ccdsNotesTbl, char *ccdsNotesFile) /* load tables into database */ { struct sqlConnection *conn = sqlConnect(hgDb); /* create tables with _tmp extension, then rename after all5B are loaded */ // ccdsInfo char ccdsInfoTmpTbl[512], *ccdsInfoSql; safef (ccdsInfoTmpTbl, sizeof(ccdsInfoTmpTbl), "%s_tmp", ccdsInfoTbl); ccdsInfoSql = ccdsInfoGetCreateSql(ccdsInfoTmpTbl); sqlRemakeTable(conn, ccdsInfoTmpTbl, ccdsInfoSql); sqlLoadTabFile(conn, ccdsInfoFile, ccdsInfoTmpTbl, SQL_TAB_FILE_ON_SERVER); // ccdsNotes char ccdsNotesTmpTbl[512], *ccdsNotesSql; safef (ccdsNotesTmpTbl, sizeof(ccdsNotesTmpTbl), "%s_tmp", ccdsNotesTbl); ccdsNotesSql = ccdsNotesGetCreateSql(ccdsNotesTmpTbl); sqlRemakeTable(conn, ccdsNotesTmpTbl, ccdsNotesSql); sqlLoadTabFile(conn, ccdsNotesFile, ccdsNotesTmpTbl, SQL_TAB_FILE_ON_SERVER); // ccdsGene char ccdsGeneTmpTbl[512], *ccdsGeneSql; safef(ccdsGeneTmpTbl, sizeof(ccdsGeneTmpTbl), "%s_tmp", ccdsGeneTbl); ccdsGeneSql = genePredGetCreateSql(ccdsGeneTmpTbl, genePredAllFlds, genePredWithBin, hGetMinIndexLength(hgDb)); sqlRemakeTable(conn, ccdsGeneTmpTbl, ccdsGeneSql); freeMem(ccdsInfoSql); freeMem(ccdsGeneSql); sqlLoadTabFile(conn, ccdsGeneFile, ccdsGeneTmpTbl, SQL_TAB_FILE_ON_SERVER); ccdsRenameTable(conn, ccdsInfoTmpTbl, ccdsInfoTbl); ccdsRenameTable(conn, ccdsNotesTmpTbl, ccdsNotesTbl); ccdsRenameTable(conn, ccdsGeneTmpTbl, ccdsGeneTbl); if (!keep) { unlink(ccdsInfoFile); unlink(ccdsNotesFile); unlink(ccdsGeneFile); } sqlDisconnect(&conn); }
void hgLoadMafFrames(char *db, char *table, int numFramesFiles, char **framesFiles) /* load an mafFrames table */ { char tabFile[PATH_LEN], *createSql; struct sqlConnection *conn; safef(tabFile, sizeof(tabFile), "%s.tab", table); processFrameFiles(tabFile, numFramesFiles, framesFiles); /* create table */ conn = hAllocConn(db); createSql = mafFramesGetSql(table, 0, hGetMinIndexLength(db)); sqlRemakeTable(conn, table, createSql); freez(&createSql); sqlLoadTabFile(conn, tabFile, table, SQL_TAB_FILE_ON_SERVER); unlink(tabFile); hFreeConn(&conn); }
void hgLoadMafSummary(char *db, char *table, char *fileName) /* hgLoadMafSummary - Load a summary table of pairs in a maf into a database. */ { long mafCount = 0, allMafCount = 0; struct mafComp *mcMaster = NULL; struct mafAli *maf; struct mafFile *mf = mafOpen(fileName); struct sqlConnection *conn; FILE *f = hgCreateTabFile(".", table); long componentCount = 0; struct hash *componentHash = newHash(0); if (!test) { conn = sqlConnect(database); mafSummaryTableCreate(conn, table, hGetMinIndexLength(db)); } verbose(1, "Indexing and tabulating %s\n", fileName); /* process mafs */ while ((maf = mafNext(mf)) != NULL) { mcMaster = mafMaster(maf, mf, fileName); allMafCount++; if (mcMaster->srcSize < minSeqSize) continue; while (mcMaster->size > maxSize) { /* break maf into maxSize pieces */ int end = mcMaster->start + maxSize; struct mafAli *subMaf = mafSubset(maf, mcMaster->src, mcMaster->start, end); verbose(3, "Splitting maf %s:%d len %d\n", mcMaster->src, mcMaster->start, mcMaster->size); componentCount += processMaf(subMaf, componentHash, f, mf, fileName); mafAliFree(&subMaf); subMaf = mafSubset(maf, mcMaster->src, end, end + (mcMaster->size - maxSize)); mafAliFree(&maf); maf = subMaf; mcMaster = mafMaster(maf, mf, fileName); } if (mcMaster->size != 0) { /* remainder of maf after splitting off maxSize submafs */ componentCount += processMaf(maf, componentHash, f, mf, fileName); } mafAliFree(&maf); mafCount++; } mafFileFree(&mf); flushSummaryBlocks(componentHash, f); verbose(1, "Created %ld summary blocks from %ld components and %ld mafs from %s\n", summaryCount, componentCount, allMafCount, fileName); if (test) return; verbose(1, "Loading into %s table %s...\n", database, table); hgLoadTabFile(conn, ".", table, &f); verbose(1, "Loading complete"); hgEndUpdate(&conn, "Add %ld maf summary blocks from %s\n", summaryCount, fileName); }
void minIndexLength(char *database) /* minIndexLength - check chrom names to find the best size for chrom index. */ { int minLength = hGetMinIndexLength(database); printf("# database: %s, minIndexLength: %d\n", database, minLength); }
static void loadDatabase(char *database, char *track, int bedSize, struct bedStub *bedList) /* Load database from bedList. */ { struct sqlConnection *conn; struct dyString *dy = newDyString(1024); char *tab = (char *)NULL; int loadOptions = (optionExists("onServer") ? SQL_TAB_FILE_ON_SERVER : 0); if ( ! noLoad ) conn = sqlConnect(database); if ((char *)NULL != tmpDir) tab = cloneString(rTempName(tmpDir,"loadBed",".tab")); else tab = cloneString("bed.tab"); if (bedDetail && sqlTable == NULL) errAbort("bedDetail format requires sqlTable option"); if (bedDetail && !strictTab) errAbort("bedDetail format must be tab separated"); if (bedDetail && !noBin) noBin = TRUE; /* First make table definition. */ if (sqlTable != NULL && !oldTable) { /* Read from file. */ char *sql, *s; readInGulp(sqlTable, &sql, NULL); /* Chop off end-of-statement semicolon if need be. */ s = strchr(sql, ';'); if (s != NULL) *s = 0; if ( !noLoad ) { if (renameSqlTable) { char *pos = stringIn("CREATE TABLE ", sql); if (pos == NULL) errAbort("Can't find CREATE TABLE in %s\n", sqlTable); char *oldSql = cloneString(sql); nextWord(&pos); nextWord(&pos); char *tableName = nextWord(&pos); sql = replaceChars(oldSql, tableName, track); } verbose(1, "Creating table definition for %s\n", track); sqlRemakeTable(conn, track, sql); if (!noBin) addBinToEmptyTable(conn, track); adjustSqlTableColumns(conn, track, bedSize); } freez(&sql); } else if (!oldTable) { int minLength; if (noLoad) minLength=6; else if (maxChromNameLength) minLength = maxChromNameLength; else minLength = hGetMinIndexLength(database); verbose(2, "INDEX chrom length: %d\n", minLength); /* Create definition statement. */ verbose(1, "Creating table definition for %s\n", track); dyStringPrintf(dy, "CREATE TABLE %s (\n", track); if (!noBin) dyStringAppend(dy, " bin smallint unsigned not null,\n"); dyStringAppend(dy, " chrom varchar(255) not null,\n"); dyStringAppend(dy, " chromStart int unsigned not null,\n"); dyStringAppend(dy, " chromEnd int unsigned not null,\n"); if (bedSize >= 4) maybeBedGraph(4, dy, " name varchar(255) not null,\n"); if (bedSize >= 5) { if (allowNegativeScores) maybeBedGraph(5, dy, " score int not null,\n"); else maybeBedGraph(5, dy, " score int unsigned not null,\n"); } if (bedSize >= 6) maybeBedGraph(6, dy, " strand char(1) not null,\n"); if (bedSize >= 7) maybeBedGraph(7, dy, " thickStart int unsigned not null,\n"); if (bedSize >= 8) maybeBedGraph(8, dy, " thickEnd int unsigned not null,\n"); /* As of 2004-11-22 the reserved field is used as itemRgb in code */ if (bedSize >= 9) maybeBedGraph(9, dy, " reserved int unsigned not null,\n"); if (bedSize >= 10) maybeBedGraph(10, dy, " blockCount int unsigned not null,\n"); if (bedSize >= 11) maybeBedGraph(11, dy, " blockSizes longblob not null,\n"); if (bedSize >= 12) maybeBedGraph(12, dy, " chromStarts longblob not null,\n"); if (bedSize >= 13) maybeBedGraph(13, dy, " expCount int unsigned not null,\n"); if (bedSize >= 14) maybeBedGraph(14, dy, " expIds longblob not null,\n"); if (bedSize >= 15) maybeBedGraph(15, dy, " expScores longblob not null,\n"); dyStringAppend(dy, "#Indices\n"); if (nameIx && (bedSize >= 4) && (0 == bedGraph)) dyStringAppend(dy, " INDEX(name(16)),\n"); if (noBin) { dyStringPrintf(dy, " INDEX(chrom(%d),chromStart)\n", minLength); } else { dyStringPrintf(dy, " INDEX(chrom(%d),bin)\n", minLength); } dyStringAppend(dy, ")\n"); if (noLoad) verbose(2,"%s", dy->string); else sqlRemakeTable(conn, track, dy->string); } verbose(1, "Saving %s\n", tab); writeBedTab(tab, bedList, bedSize); if ( ! noLoad ) { verbose(1, "Loading %s\n", database); if (customTrackLoader) sqlLoadTabFile(conn, tab, track, loadOptions|SQL_TAB_FILE_WARN_ON_WARN); else sqlLoadTabFile(conn, tab, track, loadOptions); if (! noHistory) { char comment[256]; /* add a comment to the history table and finish up connection */ safef(comment, sizeof(comment), "Add %d element(s) from bed list to %s table", slCount(bedList), track); hgHistoryComment(conn, comment); } if(fillInScoreColumn != NULL) { char query[500]; char buf[500]; struct sqlResult *sr; safef(query, sizeof(query), "select sum(score) from %s", track); if(sqlQuickQuery(conn, query, buf, sizeof(buf))) { unsigned sum = sqlUnsigned(buf); if (!sum) { safef(query, sizeof(query), "select min(%s), max(%s) from %s", fillInScoreColumn, fillInScoreColumn, track); if ((sr = sqlGetResult(conn, query)) != NULL) { char **row = sqlNextRow(sr); if(row != NULL) { float min = sqlFloat(row[0]); float max = sqlFloat(row[1]); if ( !(max == -1 && min == -1)) // if score is -1 then ignore, as if it werent present { if (max == min || sameString(row[0],row[1])) // this will lead to 'inf' score value in SQL update causing an error errAbort("Could not set score in table %s max(%s)=min(%s)=%s\n", track, fillInScoreColumn, fillInScoreColumn, row[0]); sqlFreeResult(&sr); // Calculate a, b s/t f(x) = ax + b maps min-max => minScore-1000 float a = (1000-minScore) / (max - min); float b = 1000 - ((1000-minScore) * max) / (max - min); safef(query, sizeof(query), "update %s set score = round((%f * %s) + %f)", track, a, fillInScoreColumn, b); int changed = sqlUpdateRows(conn, query, NULL); verbose(2, "update query: %s; changed: %d\n", query, changed); } else { sqlFreeResult(&sr); verbose(2, "score not updated; all values for column %s are -1\n", fillInScoreColumn); } } } } } } sqlDisconnect(&conn); /* if temp dir specified, unlink file to make it disappear */ if ((char *)NULL != tmpDir) unlink(tab); } else verbose(1, "No load option selected, see file: %s\n", tab); } /* static void loadDatabase() */
void hgLoadChromGraph(boolean doLoad, char *db, char *track, char *fileName) /* hgLoadChromGraph - Load up chromosome graph. */ { double minVal,maxVal; struct chromGraph *el, *list; FILE *f; char *tempDir = "."; char path[PATH_LEN], gbdbPath[PATH_LEN]; char *idTable = optionVal("idTable", NULL); char *pathPrefix = NULL; if (idTable == NULL) list = chromGraphLoadAll(fileName); else list = chromGraphListWithTable(fileName, db, idTable); if (list == NULL) errAbort("%s is empty", fileName); /* Figure out min/max values */ minVal = maxVal = list->val; for (el = list->next; el != NULL; el = el->next) { if (optionExists("minusLog10")) { if (el->val == 1) el->val = 0; else if (el->val > 0) el->val = -1 * log(el->val)/log(10); } if (el->val < minVal) minVal = el->val; if (el->val > maxVal) maxVal = el->val; } /* Sort and write out temp file. */ slSort(&list, chromGraphCmp); f = hgCreateTabFile(tempDir, track); for (el = list; el != NULL; el = el->next) chromGraphTabOut(el, f); if (doLoad) { struct dyString *dy = dyStringNew(0); struct sqlConnection *conn; /* Set up connection to database and create main table. */ conn = hAllocConn(db); sqlDyStringPrintf(dy, createString, track, hGetMinIndexLength(db)); sqlRemakeTable(conn, track, dy->string); /* Load main table and clean up file handle. */ hgLoadTabFile(conn, tempDir, track, &f); hgRemoveTabFile(tempDir, track); /* If need be create meta table. If need be delete old row. */ if (!sqlTableExists(conn, "metaChromGraph")) sqlUpdate(conn, metaCreateString); else { dyStringClear(dy); sqlDyStringPrintf(dy, "delete from metaChromGraph where name = '%s'", track); sqlUpdate(conn, dy->string); } /* Make chrom graph file */ safef(path, sizeof(path), "%s.cgb", track); chromGraphToBin(list, path); safef(path, sizeof(path), "/gbdb/%s/chromGraph", db); pathPrefix = optionVal("pathPrefix", path); safef(gbdbPath, sizeof(gbdbPath), "%s/%s.cgb", pathPrefix, track); /* Create new line in meta table */ dyStringClear(dy); sqlDyStringPrintf(dy, "insert into metaChromGraph values('%s',%f,%f,'%s');", track, minVal, maxVal, gbdbPath); sqlUpdate(conn, dy->string); } }