void relationRecon(char *fileName)
/* Do relationship based reconstruction. */
{
char **words;
long start, end;
struct sqlConnection *conn = sqlConnect(database);
struct sqlResult *sr;
char **row;
FILE *f = mustOpen(fileName, "w");
int lineCount;
int i;
char query[256];

start = clock1000();
words = loadWords();
end = clock1000();
printf("Time to load words: %4.3f\n", 0.001*(end-start));
start = clock1000();

lineCount = sqlTableSize(conn, "lineSize");
for (i=0; i<lineCount; ++i)
    {
    sprintf(query, "select * from lineWords where line = %d", i);
    sr = sqlQuery(conn, query);
    while ((row = sqlNextRow(sr)) != NULL)
	fileOutput(f,words[sqlUnsigned(row[1])]);
    sqlFreeResult(&sr);
    }
end = clock1000();
printf("Time to relation reconstruct file: %4.3f\n", 0.001*(end-start));
sqlDisconnect(&conn);
}
Exemple #2
0
int tableSize(char *db, char *table)
/* Return number of rows in table. */
{
struct sqlConnection *conn = sqlConnect(db);
int size = sqlTableSize(conn, table);
sqlDisconnect(&conn);
return size;
}
Exemple #3
0
void addRowcount()
/* get the size of each table */
{
struct table *table1 = NULL;
struct sqlConnection *conn = hAllocConn(database);

for (table1 = tableList; table1 != NULL; table1 = table1->next)
    {
    table1->rowCount = sqlTableSize(conn, table1->name);
    }
verbose(1, "done with rowCount lookup\n");
}
Exemple #4
0
void describeFields(char *db, char *table,
                    struct asObject *asObj, struct sqlConnection *conn)
/* Print out an HTML table showing table fields and types, and optionally
 * offering histograms for the text/enum fields. */
{
struct sqlResult *sr;
char **row;
#define TOO_BIG_FOR_HISTO 500000
boolean tooBig = (sqlTableSize(conn, table) > TOO_BIG_FOR_HISTO);
char query[256];
struct slName *exampleList, *example;
boolean showItemRgb = FALSE;

showItemRgb=bedItemRgb(findTdbForTable(db, curTrack, table, ctLookupName));
// should we expect itemRgb instead of "reserved"

sqlSafef(query, sizeof(query), "select * from %s limit 1", table);
exampleList = storeRow(conn, query);
sqlSafef(query, sizeof(query), "describe %s", table);
sr = sqlGetResult(conn, query);

hTableStart();
hPrintf("<TR><TH>field</TH>");
if (exampleList != NULL)
    hPrintf("<TH>example</TH>");
hPrintf("<TH>SQL type</TH> ");
if (!tooBig)
    hPrintf("<TH>info</TH> ");
if (asObj != NULL)
    hPrintf("<TH>description</TH> ");
puts("</TR>\n");
example = exampleList;
while ((row = sqlNextRow(sr)) != NULL)
    {
    if (showItemRgb && (sameWord(row[0],"reserved")))
	hPrintf("<TR><TD><TT>itemRgb</TT></TD> ");
    else
	hPrintf("<TR><TD><TT>%s</TT></TD> ", row[0]);
    if (exampleList != NULL)
        {
	hPrintf("<TD>");
	if (example != NULL)
	     hPrintf("%s", cleanExample(example->name));
	else
	     hPrintf("n/a");
	hPrintf("</TD>");
	}
    // enums/sets with many items can make for painfully wide rows in the table --
    // add spaces between quoted list values:
    if (stringIn("','", row[1]))
	{
	struct dyString *spaced = dyStringSub(row[1], "','", "', '");
	hPrintf("<TD><TT>%s</TT></TD>", spaced->string);
	}
    else
	hPrintf("<TD><TT>%s</TT></TD>", row[1]);
    if (!tooBig)
	{
	hPrintf(" <TD>");
	if ((isSqlStringType(row[1]) && !sameString(row[1], "longblob")) ||
	    isSqlEnumType(row[1]) || isSqlSetType(row[1]))
	    {
	    hPrintf("<A HREF=\"%s", getScriptName());
	    hPrintf("?%s", cartSidUrlString(cart));
	    hPrintf("&%s=%s", hgtaDatabase, db);
	    hPrintf("&%s=%s", hgtaHistoTable, table);
	    hPrintf("&%s=%s", hgtaDoValueHistogram, row[0]);
	    hPrintf("\">");
	    hPrintf("values");
	    hPrintf("</A>");
	    }
	else if (isSqlNumType(row[1]))
	    {
	    hPrintf("<A HREF=\"%s", getScriptName());
	    hPrintf("?%s", cartSidUrlString(cart));
	    hPrintf("&%s=%s", hgtaDatabase, db);
	    hPrintf("&%s=%s", hgtaHistoTable, table);
	    hPrintf("&%s=%s", hgtaDoValueRange, row[0]);
	    hPrintf("\">");
	    hPrintf("range");
	    hPrintf("</A>");
	    }
	else
	    {
	    hPrintf("&nbsp;");
	    }
	hPrintf("</TD>");
	}
    if (asObj != NULL)
        {
	struct asColumn *asCol = asColumnFind(asObj, row[0]);
	hPrintf(" <TD>");
	if (asCol != NULL)
	    hPrintf("%s", asCol->comment);
	else
	    {
	    if (sameString("bin", row[0]))
	       hPrintf("Indexing field to speed chromosome range queries.");
	    else
		hPrintf("&nbsp;");
	    }
	hPrintf("</TD>");
	}
    puts("</TR>");
    if (example != NULL)
	example = example->next;
    }
hTableEnd();
sqlFreeResult(&sr);
}
boolean cleanTable(char *table)
/* clean a specific table */
{

struct sqlResult *sr;
char **row;
char query[256];
int *ids;
int totalRows = 0;
boolean squealed = FALSE;
time_t cleanStart = time(NULL);

verbose(1, "-------------------\n");
verbose(1, "Cleaning table %s\n", table);
verbose(1, "%s\n", ctime(&cleanStart));


totalRows = sqlTableSize(conn, table);
verbose(1,"totalRows=%d\n", totalRows);

if (totalRows==0)
    {
    verbose(1,"table %s is empty!", table);
    return FALSE;
    }

AllocArray(ids, totalRows);

// This is a super-fast query because it only needs to read the index which is cached in memory.
sqlSafef(query,sizeof(query), "select id from %s" , table);
sr = sqlGetResult(conn, query);
int i = 0;
while ((row = sqlNextRow(sr)) != NULL)
    {
    ids[i++] = sqlUnsigned(row[0]);
    if (i >= totalRows)
	break;
    }
sqlFreeResult(&sr);
totalRows = i;  // in case they differed.

int purgeRangeStart = -1;
int purgeRangeEnd = -1;
if (optionExists("purgeStart"))   // manual purge range specified
    {
    purgeStart = optionInt("purgeStart", -1);
    purgeEnd = optionInt("purgeEnd", -1);
    if (purgeStart < 1 || purgeStart > 720)
	errAbort("Invalid purgeStart");
    if (purgeEnd < 0)
	purgeEnd = 0;
    if (purgeStart < purgeEnd)
	errAbort("purgeStart should be greater than purgeEnd (in days ago)");
    purgeRangeStart = binaryIdSearch(ids, totalRows, table, purgeStart);
    purgeRangeEnd   = binaryIdSearch(ids, totalRows, table, purgeEnd);
    verbose(1, "manual purge range: purgeStart %d purgeEnd %d rangeStart %d rangeEnd %d rangeSize=%d ids[rs]=%d\n", 
                                    purgeStart,   purgeEnd, purgeRangeStart, purgeRangeEnd, purgeRangeEnd-purgeRangeStart, ids[purgeRangeStart]);
    if (!optionExists("dryRun"))
	cleanTableSection(table, ids[purgeRangeStart], ids[purgeRangeEnd]);
    }
else  // figure out purge-ranges automatically
    {

    int firstUseAge = 0;
    if (sameString(table, sessionDbTableName))
	firstUseAge = 14;
    if (sameString(table, userDbTableName))
	firstUseAge = 365;

    int day = sqlQuickNum(conn, NOSQLINJ "select dayofweek(now())");

    // These old records take a long time to go through, 5k sessionDb to 55k userDb old recs to look at,
    //  and typically produce only a few hundred deletions.
    //  they are growing slowly and expire rarely, so we don't need to scan them
    //  frequently and aggressively.  So ONLY scan them once per week by doing 1/7 per day.
    // Also don't need to worry much about the 
    //  borders of the split-over-7-days divisions shifting much because the set is so nearly static.  YAWN.
    int firstUseIndex = binaryIdSearch(ids, totalRows, table, firstUseAge);
    int oldRangeSize = (firstUseIndex - 0) / 7;
    int oldRangeStart = oldRangeSize * (day-1);
    int oldRangeEnd = oldRangeStart + oldRangeSize;
    verbose(1, "old cleaner: firstUseAge=%d firstUseIndex = %d day %d: rangeStart %d rangeEnd %d rangeSize=%d ids[oldRangeStart]=%d\n", 
        firstUseAge, firstUseIndex, day, oldRangeStart, oldRangeEnd, oldRangeEnd-oldRangeStart, ids[oldRangeStart]);
    //int oldRangeStart = 0;
    //int oldRangeEnd = firstUseIndex;
    //verbose(1, "old cleaner: firstUseAge=%d firstUseIndex = %d rangeStart %d rangeEnd %d rangeSize=%d ids[firstUseIndex]=%d\n", 
	//firstUseAge, firstUseIndex, oldRangeStart, oldRangeEnd, oldRangeEnd-oldRangeStart, ids[firstUseIndex]);

    // newly old can be expected to have some delete action
    //  these records have newly crossed the threshold into being old enough to have possibly expired.
    int newOldRangeStart = firstUseIndex;
    int newOldRangeEnd = binaryIdSearch(ids, totalRows, table, firstUseAge - 1);
    verbose(1, "newOld cleaner: firstUseAge=%d rangeStart %d rangeEnd %d rangeSize=%d ids[newOldRangeStart]=%d\n", 
	firstUseAge, newOldRangeStart, newOldRangeEnd, newOldRangeEnd-newOldRangeStart, ids[newOldRangeStart]);
   

    // this is the main delete action of cleaning out new robots (20k to 50k or more)
    int robo1RangeStart = binaryIdSearch(ids, totalRows, table, 2);
    int robo1RangeEnd   = binaryIdSearch(ids, totalRows, table, 1);
    verbose(1, "robot cleaner1: twoDayIndex = %d oneDayIndex %d rangeSize=%d ids[rs]=%d\n", 
      robo1RangeStart, robo1RangeEnd, robo1RangeEnd-robo1RangeStart, ids[robo1RangeStart]);

    int robo2RangeStart = -1;
    int robo2RangeEnd = -1;
    if (sameString(table, userDbTableName))
	{  // secondary robot cleaning only for userDb., produces a somewhat lesser, perhaps 3 to 5k deletions
	robo2RangeStart = binaryIdSearch(ids, totalRows, table, 7);
	robo2RangeEnd   = binaryIdSearch(ids, totalRows, table, 6);
	verbose(1, "robot cleaner2: sevenDayIndex = %d sixDayIndex %d rangeSize=%d ids[rs]=%d\n", 
	  robo2RangeStart, robo2RangeEnd, robo2RangeEnd-robo2RangeStart, ids[robo2RangeStart]);
	}

    /* cannot clean until we have all the ranges determined since deleting messes up binSearch */
    if (!optionExists("dryRun"))
	{
	verbose(1, "old cleaner:\n");
	cleanTableSection(table, ids[oldRangeStart], ids[oldRangeEnd]);
	}

    if (!optionExists("dryRun"))
	{
	verbose(1, "newOld cleaner:\n");
	cleanTableSection(table, ids[newOldRangeStart], ids[newOldRangeEnd]);
	}

    if (!optionExists("dryRun"))
	{
	verbose(1, "robot cleaner1:\n");
	cleanTableSection(table, ids[robo1RangeStart], ids[robo1RangeEnd]);
	}

    if (sameString(table, userDbTableName))
	{
	if (!optionExists("dryRun"))
	    {
	    verbose(1, "robot cleaner2:\n");
	    cleanTableSection(table, ids[robo2RangeStart], ids[robo2RangeEnd]);
	    }
	}

    }

/*
int found = binaryIdSearch(ids, totalRows, table, 1);
if ((found >= 0) && (found < totalRows))
    verbose(1, "1 days ago found = %d, id == ids[found] = %d \n", found, ids[found]);

found = binaryIdSearch(ids, totalRows, table, 2);
if ((found >= 0) && (found < totalRows))
    verbose(1, "2 days ago found = %d, id == ids[found] = %d \n", found, ids[found]);

found = binaryIdSearch(ids, totalRows, table, 30);
if ((found >= 0) && (found < totalRows))
    verbose(1, "30 days ago found = %d, id == ids[found] = %d \n", found, ids[found]);

*/


	    /*
	    if (daysAgoFirstUse < 14)
		{
		hitEnd = TRUE;
                break;
		}
	    */

            /*
	    if (daysAgoFirstUse < 365)
		{
		hitEnd = TRUE;
                break;
		}
            */

// may need to pass back this data from the cleanTableSection call TODO
//verbose(1, "%s: #rows count=%d  delCount=%d\n\n", table, count, delCount);

time_t cleanEnd = time(NULL);
int minutes = difftime(cleanEnd, cleanStart) / 60; 
verbose(1, "%s\n", ctime(&cleanEnd));
verbose(1, "%d minutes total\n\n", minutes);

squealed = checkMaxTableSizeExceeded(table);

return squealed;

}
void findToFixBedGraphLimits(char *input, char *output)
/* findToFixBedGraphLimits - Scan through ra file of bedGraphs and calculate limits.. */
{
struct lineFile *lf = lineFileOpen(input, TRUE);
FILE *f = mustOpen(output, "w");
struct slPair *el, *list;
while ((list = raNextRecordAsSlPairList(lf)) != NULL)
    {
    /* Find required fields for calcs. */
    char *db = mustFindVal(list, "db", lf);
    char *track = mustFindVal(list, "track", lf);
    char *type = cloneString(mustFindVal(list, "type", lf));

    /* Parse out type value, which should be "bedGraph 4" and put the 4 or whatever other number
     * in dataFieldIndex. */
    char *typeWords[3];
    int typeWordCount = chopLine(type, typeWords);
    if (typeWordCount != 2 || !sameString(typeWords[0], "bedGraph"))
           errAbort("Not well formed bedGraph type line %d of %s", lf->lineIx, lf->fileName);
    int dataFieldIndex = sqlUnsigned(typeWords[1]);

    /* Figure out field corresponding to dataFieldIndex. */
    struct sqlConnection *conn = sqlConnect(db);
    struct slName *fieldList = sqlFieldNames(conn, track);
    struct slName *pastBin = fieldList;
    if (sameString(pastBin->name, "bin"))
         pastBin = pastBin->next;
    struct slName *fieldName = slElementFromIx(pastBin, dataFieldIndex - 1);
    if (fieldName == NULL)
         errAbort("%s doesn't have enough fields", track);
    char *field = fieldName->name;
    assert(sqlFieldIndex(conn, track, field) >= 0);

    /* Print reassuring status message */
    verbose(1, "%s.%s has %d elements.  Data field is %s\n", db, track, sqlTableSize(conn, track), field);
         
    /* Get min/max dataValues in fields.  Do it ourselves rather than using SQL min/max because sometimes
     * the data field is a name column.... */
    char query[512];
    safef(query, sizeof(query), "select %s from %s", field, track);
    struct sqlResult *sr = sqlGetResult(conn, query);
    char **row;
    row = sqlNextRow(sr);
    assert(row != NULL);
    double val = sqlDouble(row[0]);
    double minLimit = val, maxLimit = val;
    while ((row = sqlNextRow(sr)) != 0)
        {
	double val = sqlDouble(row[0]);
	if (val < minLimit) minLimit = val;
	if (val > maxLimit) maxLimit = val;
	}
    sqlFreeResult(&sr);
    verbose(1, "    %g %g\n",  minLimit, maxLimit);

    /* Output original table plus new minLimit/maxLimit. */
    for (el = list; el != NULL; el = el->next)
	fprintf(f, "%s %s\n", el->name, (char *)el->val);
    fprintf(f, "minLimit %g\n", minLimit);
    fprintf(f, "maxLimit %g\n", maxLimit);
    fprintf(f, "\n");

    sqlDisconnect(&conn);
    slFreeList(&fieldList);
    slPairFreeValsAndList(&list);
    }
lineFileClose(&lf);
carefulClose(&f);
}