示例#1
0
void loadOneTable(char *database, struct sqlConnection *conn, char *tempName, char *tableName)
/* Load .tab file tempName into tableName and remove tempName. */
{
struct dyString *query = newDyString(1024);

verbose(1, "Loading up table %s\n", tableName);
if (sqlTableExists(conn, tableName))
    {
    sqlDyStringPrintf(query, "DROP table %s", tableName);
    sqlUpdate(conn, query->string);
    }

/* Create first part of table definitions, the fields. */
dyStringClear(query);
sqlDyStringPrintf(query, createRmskOut, tableName);

/* Create the indexes */
int indexLen = hGetMinIndexLength(database);
sqlDyStringPrintf(query, "   INDEX(genoName(%d),bin))\n", indexLen);

sqlUpdate(conn, query->string);

/* Load database from tab-file. */
dyStringClear(query);
sqlDyStringPrintf(query, "LOAD data local infile '%s' into table %s",
	       tempName, tableName);
sqlUpdate(conn, query->string);
remove(tempName);
}
示例#2
0
void setupTable(char *db, struct sqlConnection *conn, char *table)
/* create a psl table as needed */
{
unsigned sqlOpts = gBin ? genePredWithBin : 0;
unsigned fldOpts =  gGenePredExt ? genePredAllFlds : 0;
char* sqlCmd = genePredGetCreateSql(table, fldOpts, sqlOpts, hGetMinIndexLength(db));
sqlRemakeTable(conn, table, sqlCmd);
freez(&sqlCmd);
}
示例#3
0
static void createGeneTbl(struct gbGeneTbl *ggt, struct sqlConnection* conn)
/* create a genePred table */
{
char *sql = genePredGetCreateSql(ggt->tbl,
                                 (ggt->hasExtCols ? genePredAllFlds : 0),
                                 (ggt->hasBin ? genePredWithBin : 0),
                                 hGetMinIndexLength(sqlGetDatabase(conn)));
sqlRemakeTable(conn, ggt->tbl, sql);
freeMem(sql);
}
示例#4
0
void ldGencodeIntron(char *database, char *table,  
                        int gtfCount, char *gtfNames[])
/* Load Gencode intron status table from GTF files with
 * intron_id and intron_status keywords */
{
struct gffFile *gff, *gffList = NULL;
struct gffLine *gffLine;
struct gencodeIntron *intron, *intronList = NULL;
struct sqlConnection *conn;
FILE *f;
int i;
int introns = 0;

for (i=0; i<gtfCount; i++)
    {
    verbose(1, "Reading %s\n", gtfNames[i]);
    gff = gffRead(gtfNames[i]);
    for (gffLine = gff->lineList; gffLine != NULL; gffLine = gffLine->next)
        {
        if (sameWord(gffLine->feature, "intron"))
            {
            AllocVar(intron);
            intron->chrom = gffLine->seq;
            intron->chromStart = gffLine->start;
            intron->chromEnd = gffLine->end;
            intron->name = gffLine->intronId;
            intron->strand[0] = gffLine->strand;
            intron->strand[1] = 0;
            intron->status = gffLine->intronStatus;
            intron->transcript = gffLine->group;
            intron->geneId = gffLine->geneId;
            slAddHead(&intronList, intron);
            verbose(2, "%s %s\n", intron->chrom, intron->name);
            introns++;
            }
        }
    }
slSort(&intronList, bedCmp);
f = hgCreateTabFile(".", table);
for (intron = intronList; intron != NULL; intron = intron->next)
    gencodeIntronTabOut(intron, f);
carefulClose(&f);

verbose(1, "%d introns in %d files\n", introns, gtfCount);
hSetDb(database);
conn = sqlConnect(database);
gencodeIntronTableCreate(conn, table, hGetMinIndexLength());
hgLoadTabFile(conn, ".", table, &f);
sqlDisconnect(&conn);
}
示例#5
0
void setupTable(char *database, struct sqlConnection *conn, char* table)
/* create a psl table as needed */
{
int minLength = hGetMinIndexLength(database);
char *sqlCmd = pslGetCreateSql(table, pslCreateOpts,
                               (pslCreateOpts & PSL_TNAMEIX) ?  minLength : 0);
if (append)
    {
    checkBinConsistent(conn, table);
    sqlMaybeMakeTable(conn, table, sqlCmd);
    }
else
    sqlRemakeTable(conn, table, sqlCmd);
freez(&sqlCmd);
}
示例#6
0
static void createFlatTbl(struct gbGeneTbl *ggt, struct sqlConnection* conn)
/* create a genePred flat table */
{
/* edit generated SQL to add geneName column and index */
char *tmpDef = genePredGetCreateSql(ggt->flatTbl, 0, 0, hGetMinIndexLength(sqlGetDatabase(conn)));
char *part2 = strchr(tmpDef, '(');
*(part2++) = '\0';
char *p = strrchr(part2, ')');
*p = '\0';
char editDef[1024];
safef(editDef, sizeof(editDef),
      "%s(geneName varchar(255) not null, %s, INDEX(geneName(10)))",
      tmpDef, part2);
freeMem(tmpDef);
sqlRemakeTable(conn, ggt->flatTbl, editDef);
}
示例#7
0
static void loadTables(char *hgDb,
                       char *ccdsInfoTbl, char *ccdsInfoFile,
                       char *ccdsGeneTbl, char *ccdsGeneFile,
                       char *ccdsNotesTbl, char *ccdsNotesFile)
/* load tables into database */
{
struct sqlConnection *conn = sqlConnect(hgDb);

/* create tables with _tmp extension, then rename after all5B are loaded */

// ccdsInfo
char ccdsInfoTmpTbl[512], *ccdsInfoSql;
safef (ccdsInfoTmpTbl, sizeof(ccdsInfoTmpTbl), "%s_tmp", ccdsInfoTbl);
ccdsInfoSql = ccdsInfoGetCreateSql(ccdsInfoTmpTbl);
sqlRemakeTable(conn, ccdsInfoTmpTbl, ccdsInfoSql);
sqlLoadTabFile(conn, ccdsInfoFile, ccdsInfoTmpTbl, SQL_TAB_FILE_ON_SERVER);

// ccdsNotes
char ccdsNotesTmpTbl[512], *ccdsNotesSql;
safef (ccdsNotesTmpTbl, sizeof(ccdsNotesTmpTbl), "%s_tmp", ccdsNotesTbl);
ccdsNotesSql = ccdsNotesGetCreateSql(ccdsNotesTmpTbl);
sqlRemakeTable(conn, ccdsNotesTmpTbl, ccdsNotesSql);
sqlLoadTabFile(conn, ccdsNotesFile, ccdsNotesTmpTbl, SQL_TAB_FILE_ON_SERVER);

// ccdsGene
char ccdsGeneTmpTbl[512], *ccdsGeneSql;
safef(ccdsGeneTmpTbl, sizeof(ccdsGeneTmpTbl), "%s_tmp", ccdsGeneTbl);
ccdsGeneSql = genePredGetCreateSql(ccdsGeneTmpTbl, genePredAllFlds,
                                   genePredWithBin, hGetMinIndexLength(hgDb));
sqlRemakeTable(conn, ccdsGeneTmpTbl, ccdsGeneSql);
freeMem(ccdsInfoSql);
freeMem(ccdsGeneSql);
sqlLoadTabFile(conn, ccdsGeneFile, ccdsGeneTmpTbl, SQL_TAB_FILE_ON_SERVER);

ccdsRenameTable(conn, ccdsInfoTmpTbl, ccdsInfoTbl);
ccdsRenameTable(conn, ccdsNotesTmpTbl, ccdsNotesTbl);
ccdsRenameTable(conn, ccdsGeneTmpTbl, ccdsGeneTbl);

if (!keep)
    {
    unlink(ccdsInfoFile);
    unlink(ccdsNotesFile);
    unlink(ccdsGeneFile);
    }
sqlDisconnect(&conn);
}
示例#8
0
void hgLoadMafFrames(char *db, char *table, int numFramesFiles, char **framesFiles)
/* load an mafFrames table  */
{
char tabFile[PATH_LEN], *createSql;
struct sqlConnection *conn;
safef(tabFile, sizeof(tabFile), "%s.tab", table);

processFrameFiles(tabFile, numFramesFiles, framesFiles);

/* create table */
conn = hAllocConn(db);
createSql = mafFramesGetSql(table, 0, hGetMinIndexLength(db));
sqlRemakeTable(conn, table, createSql);
freez(&createSql);

sqlLoadTabFile(conn, tabFile, table, SQL_TAB_FILE_ON_SERVER);

unlink(tabFile);
hFreeConn(&conn);
}
void hgLoadMafSummary(char *db, char *table, char *fileName)
/* hgLoadMafSummary - Load a summary table of pairs in a maf into a database. */
{
long mafCount = 0, allMafCount = 0;
struct mafComp *mcMaster = NULL;
struct mafAli *maf;
struct mafFile *mf = mafOpen(fileName);
struct sqlConnection *conn;
FILE *f = hgCreateTabFile(".", table);
long componentCount = 0;
struct hash *componentHash = newHash(0);

if (!test)
    {
    conn = sqlConnect(database);
    mafSummaryTableCreate(conn, table, hGetMinIndexLength(db));
    }
verbose(1, "Indexing and tabulating %s\n", fileName);

/* process mafs */
while ((maf = mafNext(mf)) != NULL)
    {
    mcMaster = mafMaster(maf, mf, fileName);
    allMafCount++;
    if (mcMaster->srcSize < minSeqSize)
	continue;
    while (mcMaster->size > maxSize)
        {
        /* break maf into maxSize pieces */
        int end = mcMaster->start + maxSize;
        struct mafAli *subMaf = 
                mafSubset(maf, mcMaster->src, mcMaster->start, end);
        verbose(3, "Splitting maf %s:%d len %d\n", mcMaster->src,
                                        mcMaster->start, mcMaster->size);
        componentCount += 
            processMaf(subMaf, componentHash, f, mf, fileName);
        mafAliFree(&subMaf);
        subMaf = mafSubset(maf, mcMaster->src, 
                                end, end + (mcMaster->size - maxSize));
        mafAliFree(&maf);
        maf = subMaf;
        mcMaster = mafMaster(maf, mf, fileName);
        }
    if (mcMaster->size != 0)
        {
        /* remainder of maf after splitting off maxSize submafs */
        componentCount += 
            processMaf(maf, componentHash, f, mf, fileName);
        }
    mafAliFree(&maf);
    mafCount++;
    }
mafFileFree(&mf);
flushSummaryBlocks(componentHash, f);
verbose(1, 
    "Created %ld summary blocks from %ld components and %ld mafs from %s\n",
        summaryCount, componentCount, allMafCount, fileName);
if (test)
    return;
verbose(1, "Loading into %s table %s...\n", database, table);
hgLoadTabFile(conn, ".", table, &f);
verbose(1, "Loading complete");
hgEndUpdate(&conn, "Add %ld maf summary blocks from %s\n", 
                        summaryCount, fileName);
}
void minIndexLength(char *database)
/* minIndexLength - check chrom names to find the best size for chrom index. */
{
    int minLength = hGetMinIndexLength(database);
    printf("# database: %s, minIndexLength: %d\n", database, minLength);
}
static void loadDatabase(char *database, char *track, int bedSize, struct bedStub *bedList)
/* Load database from bedList. */
{
struct sqlConnection *conn;
struct dyString *dy = newDyString(1024);
char *tab = (char *)NULL;
int loadOptions = (optionExists("onServer") ? SQL_TAB_FILE_ON_SERVER : 0);

if ( ! noLoad )
    conn = sqlConnect(database);

if ((char *)NULL != tmpDir)
    tab = cloneString(rTempName(tmpDir,"loadBed",".tab"));
else
    tab = cloneString("bed.tab");

if (bedDetail && sqlTable == NULL) 
    errAbort("bedDetail format requires sqlTable option");
if (bedDetail && !strictTab) 
    errAbort("bedDetail format must be tab separated");
if (bedDetail && !noBin) 
    noBin = TRUE;

/* First make table definition. */
if (sqlTable != NULL && !oldTable)
    {
    /* Read from file. */
    char *sql, *s;
    readInGulp(sqlTable, &sql, NULL);
    /* Chop off end-of-statement semicolon if need be. */
    s = strchr(sql, ';');
    if (s != NULL) *s = 0;
    
    if ( !noLoad )
        {
        if (renameSqlTable)
            {
            char *pos = stringIn("CREATE TABLE ", sql);
            if (pos == NULL)
                errAbort("Can't find CREATE TABLE in %s\n", sqlTable);
            char *oldSql = cloneString(sql);
            nextWord(&pos); nextWord(&pos);
            char *tableName = nextWord(&pos);
            sql = replaceChars(oldSql, tableName, track);
            }
        verbose(1, "Creating table definition for %s\n", track);
        sqlRemakeTable(conn, track, sql);
        if (!noBin) 
	    addBinToEmptyTable(conn, track);
	adjustSqlTableColumns(conn, track, bedSize);
	}
    
    freez(&sql);
    }
else if (!oldTable)
    {
    int minLength;

    if (noLoad)
	minLength=6;
    else if (maxChromNameLength)
	minLength = maxChromNameLength;
    else
	minLength = hGetMinIndexLength(database);
    verbose(2, "INDEX chrom length: %d\n", minLength);

    /* Create definition statement. */
    verbose(1, "Creating table definition for %s\n", track);
    dyStringPrintf(dy, "CREATE TABLE %s (\n", track);
    if (!noBin)
       dyStringAppend(dy, "  bin smallint unsigned not null,\n");
    dyStringAppend(dy, "  chrom varchar(255) not null,\n");
    dyStringAppend(dy, "  chromStart int unsigned not null,\n");
    dyStringAppend(dy, "  chromEnd int unsigned not null,\n");
    if (bedSize >= 4)
       maybeBedGraph(4, dy, "  name varchar(255) not null,\n");
    if (bedSize >= 5)
	{
	if (allowNegativeScores)
	    maybeBedGraph(5, dy, "  score int not null,\n");
	else
	    maybeBedGraph(5, dy, "  score int unsigned not null,\n");
	}
    if (bedSize >= 6)
       maybeBedGraph(6, dy, "  strand char(1) not null,\n");
    if (bedSize >= 7)
       maybeBedGraph(7, dy, "  thickStart int unsigned not null,\n");
    if (bedSize >= 8)
       maybeBedGraph(8, dy, "  thickEnd int unsigned not null,\n");
    /*	As of 2004-11-22 the reserved field is used as itemRgb in code */
    if (bedSize >= 9)
       maybeBedGraph(9, dy, "  reserved int unsigned  not null,\n");
    if (bedSize >= 10)
       maybeBedGraph(10, dy, "  blockCount int unsigned not null,\n");
    if (bedSize >= 11)
       maybeBedGraph(11, dy, "  blockSizes longblob not null,\n");
    if (bedSize >= 12)
       maybeBedGraph(12, dy, "  chromStarts longblob not null,\n");
    if (bedSize >= 13)
       maybeBedGraph(13, dy, "  expCount int unsigned not null,\n");
    if (bedSize >= 14)
       maybeBedGraph(14, dy, "  expIds longblob not null,\n");
    if (bedSize >= 15)
       maybeBedGraph(15, dy, "  expScores longblob not null,\n");
    dyStringAppend(dy, "#Indices\n");
    if (nameIx && (bedSize >= 4) && (0 == bedGraph))
       dyStringAppend(dy, "  INDEX(name(16)),\n");
    if (noBin)
	{
	dyStringPrintf(dy, "  INDEX(chrom(%d),chromStart)\n", minLength);
	}
    else
	{
        dyStringPrintf(dy, "  INDEX(chrom(%d),bin)\n", minLength);
	}
    dyStringAppend(dy, ")\n");
    if (noLoad)
	verbose(2,"%s", dy->string);
    else
	sqlRemakeTable(conn, track, dy->string);
    }

verbose(1, "Saving %s\n", tab);
writeBedTab(tab, bedList, bedSize);

if ( ! noLoad )
    {
    verbose(1, "Loading %s\n", database);
    if (customTrackLoader)
	sqlLoadTabFile(conn, tab, track, loadOptions|SQL_TAB_FILE_WARN_ON_WARN);
    else
	sqlLoadTabFile(conn, tab, track, loadOptions);

    if (! noHistory)
	{
	char comment[256];
	/* add a comment to the history table and finish up connection */
	safef(comment, sizeof(comment),
	    "Add %d element(s) from bed list to %s table",
		slCount(bedList), track);
	hgHistoryComment(conn, comment);
	}
    if(fillInScoreColumn != NULL)
        {
        char query[500];
        char buf[500];
        struct sqlResult *sr;
        safef(query, sizeof(query), "select sum(score) from %s", track);
        if(sqlQuickQuery(conn, query, buf, sizeof(buf)))
            {
            unsigned sum = sqlUnsigned(buf);
            if (!sum)
                {
                safef(query, sizeof(query), "select min(%s), max(%s) from %s", fillInScoreColumn, fillInScoreColumn, track);
                if ((sr = sqlGetResult(conn, query)) != NULL)
                    {
                    char **row = sqlNextRow(sr);
                    if(row != NULL)
                        {
                        float min = sqlFloat(row[0]);
                        float max = sqlFloat(row[1]);
			if ( !(max == -1 && min == -1)) // if score is -1 then ignore, as if it werent present
			    {
			    if (max == min || sameString(row[0],row[1])) // this will lead to 'inf' score value in SQL update causing an error
				errAbort("Could not set score in table %s max(%s)=min(%s)=%s\n", track, fillInScoreColumn, fillInScoreColumn, row[0]);
                            sqlFreeResult(&sr);

			    // Calculate a, b s/t f(x) = ax + b maps min-max => minScore-1000
			    float a = (1000-minScore) / (max - min);
			    float b = 1000 - ((1000-minScore) * max) / (max - min);

			    safef(query, sizeof(query), "update %s set score = round((%f * %s) + %f)",  track, a, fillInScoreColumn, b);
			    int changed = sqlUpdateRows(conn, query, NULL);
			    verbose(2, "update query: %s; changed: %d\n", query, changed);
			    }
			else
			    {
                            sqlFreeResult(&sr);
			    verbose(2, "score not updated; all values for column %s are -1\n", fillInScoreColumn);
			    }
			}
                    }
                }
            }

        }
    sqlDisconnect(&conn);
    /*	if temp dir specified, unlink file to make it disappear */
    if ((char *)NULL != tmpDir)
	unlink(tab);
    }
else
    verbose(1, "No load option selected, see file: %s\n", tab);

}	/*	static void loadDatabase()	*/
void hgLoadChromGraph(boolean doLoad, char *db, char *track, char *fileName)
/* hgLoadChromGraph - Load up chromosome graph. */
{
    double minVal,maxVal;
    struct chromGraph *el, *list;
    FILE *f;
    char *tempDir = ".";
    char path[PATH_LEN], gbdbPath[PATH_LEN];
    char *idTable = optionVal("idTable", NULL);
    char *pathPrefix = NULL;

    if (idTable == NULL)
        list = chromGraphLoadAll(fileName);
    else
        list = chromGraphListWithTable(fileName, db, idTable);
    if (list == NULL)
        errAbort("%s is empty", fileName);

    /* Figure out min/max values */
    minVal = maxVal = list->val;
    for (el = list->next; el != NULL; el = el->next)
    {
        if (optionExists("minusLog10"))
        {
            if (el->val == 1)
                el->val = 0;
            else if (el->val > 0)
                el->val = -1 * log(el->val)/log(10);
        }
        if (el->val < minVal)
            minVal = el->val;
        if (el->val > maxVal)
            maxVal = el->val;
    }


    /* Sort and write out temp file. */
    slSort(&list, chromGraphCmp);
    f = hgCreateTabFile(tempDir, track);
    for (el = list; el != NULL; el = el->next)
        chromGraphTabOut(el, f);

    if (doLoad)
    {
        struct dyString *dy = dyStringNew(0);
        struct sqlConnection *conn;

        /* Set up connection to database and create main table. */
        conn = hAllocConn(db);
        sqlDyStringPrintf(dy, createString, track, hGetMinIndexLength(db));
        sqlRemakeTable(conn, track, dy->string);

        /* Load main table and clean up file handle. */
        hgLoadTabFile(conn, tempDir, track, &f);
        hgRemoveTabFile(tempDir, track);

        /* If need be create meta table.  If need be delete old row. */
        if (!sqlTableExists(conn, "metaChromGraph"))
            sqlUpdate(conn, metaCreateString);
        else
        {
            dyStringClear(dy);
            sqlDyStringPrintf(dy, "delete from metaChromGraph where name = '%s'",
                              track);
            sqlUpdate(conn, dy->string);
        }

        /* Make chrom graph file */
        safef(path, sizeof(path), "%s.cgb", track);
        chromGraphToBin(list, path);
        safef(path, sizeof(path), "/gbdb/%s/chromGraph", db);
        pathPrefix = optionVal("pathPrefix", path);
        safef(gbdbPath, sizeof(gbdbPath), "%s/%s.cgb", pathPrefix, track);

        /* Create new line in meta table */
        dyStringClear(dy);
        sqlDyStringPrintf(dy, "insert into metaChromGraph values('%s',%f,%f,'%s');",
                          track, minVal, maxVal, gbdbPath);
        sqlUpdate(conn, dy->string);
    }
}