void relationRecon(char *fileName) /* Do relationship based reconstruction. */ { char **words; long start, end; struct sqlConnection *conn = sqlConnect(database); struct sqlResult *sr; char **row; FILE *f = mustOpen(fileName, "w"); int lineCount; int i; char query[256]; start = clock1000(); words = loadWords(); end = clock1000(); printf("Time to load words: %4.3f\n", 0.001*(end-start)); start = clock1000(); lineCount = sqlTableSize(conn, "lineSize"); for (i=0; i<lineCount; ++i) { sprintf(query, "select * from lineWords where line = %d", i); sr = sqlQuery(conn, query); while ((row = sqlNextRow(sr)) != NULL) fileOutput(f,words[sqlUnsigned(row[1])]); sqlFreeResult(&sr); } end = clock1000(); printf("Time to relation reconstruct file: %4.3f\n", 0.001*(end-start)); sqlDisconnect(&conn); }
int tableSize(char *db, char *table) /* Return number of rows in table. */ { struct sqlConnection *conn = sqlConnect(db); int size = sqlTableSize(conn, table); sqlDisconnect(&conn); return size; }
void addRowcount() /* get the size of each table */ { struct table *table1 = NULL; struct sqlConnection *conn = hAllocConn(database); for (table1 = tableList; table1 != NULL; table1 = table1->next) { table1->rowCount = sqlTableSize(conn, table1->name); } verbose(1, "done with rowCount lookup\n"); }
void describeFields(char *db, char *table, struct asObject *asObj, struct sqlConnection *conn) /* Print out an HTML table showing table fields and types, and optionally * offering histograms for the text/enum fields. */ { struct sqlResult *sr; char **row; #define TOO_BIG_FOR_HISTO 500000 boolean tooBig = (sqlTableSize(conn, table) > TOO_BIG_FOR_HISTO); char query[256]; struct slName *exampleList, *example; boolean showItemRgb = FALSE; showItemRgb=bedItemRgb(findTdbForTable(db, curTrack, table, ctLookupName)); // should we expect itemRgb instead of "reserved" sqlSafef(query, sizeof(query), "select * from %s limit 1", table); exampleList = storeRow(conn, query); sqlSafef(query, sizeof(query), "describe %s", table); sr = sqlGetResult(conn, query); hTableStart(); hPrintf("<TR><TH>field</TH>"); if (exampleList != NULL) hPrintf("<TH>example</TH>"); hPrintf("<TH>SQL type</TH> "); if (!tooBig) hPrintf("<TH>info</TH> "); if (asObj != NULL) hPrintf("<TH>description</TH> "); puts("</TR>\n"); example = exampleList; while ((row = sqlNextRow(sr)) != NULL) { if (showItemRgb && (sameWord(row[0],"reserved"))) hPrintf("<TR><TD><TT>itemRgb</TT></TD> "); else hPrintf("<TR><TD><TT>%s</TT></TD> ", row[0]); if (exampleList != NULL) { hPrintf("<TD>"); if (example != NULL) hPrintf("%s", cleanExample(example->name)); else hPrintf("n/a"); hPrintf("</TD>"); } // enums/sets with many items can make for painfully wide rows in the table -- // add spaces between quoted list values: if (stringIn("','", row[1])) { struct dyString *spaced = dyStringSub(row[1], "','", "', '"); hPrintf("<TD><TT>%s</TT></TD>", spaced->string); } else hPrintf("<TD><TT>%s</TT></TD>", row[1]); if (!tooBig) { hPrintf(" <TD>"); if ((isSqlStringType(row[1]) && !sameString(row[1], "longblob")) || isSqlEnumType(row[1]) || isSqlSetType(row[1])) { hPrintf("<A HREF=\"%s", getScriptName()); hPrintf("?%s", cartSidUrlString(cart)); hPrintf("&%s=%s", hgtaDatabase, db); hPrintf("&%s=%s", hgtaHistoTable, table); hPrintf("&%s=%s", hgtaDoValueHistogram, row[0]); hPrintf("\">"); hPrintf("values"); hPrintf("</A>"); } else if (isSqlNumType(row[1])) { hPrintf("<A HREF=\"%s", getScriptName()); hPrintf("?%s", cartSidUrlString(cart)); hPrintf("&%s=%s", hgtaDatabase, db); hPrintf("&%s=%s", hgtaHistoTable, table); hPrintf("&%s=%s", hgtaDoValueRange, row[0]); hPrintf("\">"); hPrintf("range"); hPrintf("</A>"); } else { hPrintf(" "); } hPrintf("</TD>"); } if (asObj != NULL) { struct asColumn *asCol = asColumnFind(asObj, row[0]); hPrintf(" <TD>"); if (asCol != NULL) hPrintf("%s", asCol->comment); else { if (sameString("bin", row[0])) hPrintf("Indexing field to speed chromosome range queries."); else hPrintf(" "); } hPrintf("</TD>"); } puts("</TR>"); if (example != NULL) example = example->next; } hTableEnd(); sqlFreeResult(&sr); }
boolean cleanTable(char *table) /* clean a specific table */ { struct sqlResult *sr; char **row; char query[256]; int *ids; int totalRows = 0; boolean squealed = FALSE; time_t cleanStart = time(NULL); verbose(1, "-------------------\n"); verbose(1, "Cleaning table %s\n", table); verbose(1, "%s\n", ctime(&cleanStart)); totalRows = sqlTableSize(conn, table); verbose(1,"totalRows=%d\n", totalRows); if (totalRows==0) { verbose(1,"table %s is empty!", table); return FALSE; } AllocArray(ids, totalRows); // This is a super-fast query because it only needs to read the index which is cached in memory. sqlSafef(query,sizeof(query), "select id from %s" , table); sr = sqlGetResult(conn, query); int i = 0; while ((row = sqlNextRow(sr)) != NULL) { ids[i++] = sqlUnsigned(row[0]); if (i >= totalRows) break; } sqlFreeResult(&sr); totalRows = i; // in case they differed. int purgeRangeStart = -1; int purgeRangeEnd = -1; if (optionExists("purgeStart")) // manual purge range specified { purgeStart = optionInt("purgeStart", -1); purgeEnd = optionInt("purgeEnd", -1); if (purgeStart < 1 || purgeStart > 720) errAbort("Invalid purgeStart"); if (purgeEnd < 0) purgeEnd = 0; if (purgeStart < purgeEnd) errAbort("purgeStart should be greater than purgeEnd (in days ago)"); purgeRangeStart = binaryIdSearch(ids, totalRows, table, purgeStart); purgeRangeEnd = binaryIdSearch(ids, totalRows, table, purgeEnd); verbose(1, "manual purge range: purgeStart %d purgeEnd %d rangeStart %d rangeEnd %d rangeSize=%d ids[rs]=%d\n", purgeStart, purgeEnd, purgeRangeStart, purgeRangeEnd, purgeRangeEnd-purgeRangeStart, ids[purgeRangeStart]); if (!optionExists("dryRun")) cleanTableSection(table, ids[purgeRangeStart], ids[purgeRangeEnd]); } else // figure out purge-ranges automatically { int firstUseAge = 0; if (sameString(table, sessionDbTableName)) firstUseAge = 14; if (sameString(table, userDbTableName)) firstUseAge = 365; int day = sqlQuickNum(conn, NOSQLINJ "select dayofweek(now())"); // These old records take a long time to go through, 5k sessionDb to 55k userDb old recs to look at, // and typically produce only a few hundred deletions. // they are growing slowly and expire rarely, so we don't need to scan them // frequently and aggressively. So ONLY scan them once per week by doing 1/7 per day. // Also don't need to worry much about the // borders of the split-over-7-days divisions shifting much because the set is so nearly static. YAWN. int firstUseIndex = binaryIdSearch(ids, totalRows, table, firstUseAge); int oldRangeSize = (firstUseIndex - 0) / 7; int oldRangeStart = oldRangeSize * (day-1); int oldRangeEnd = oldRangeStart + oldRangeSize; verbose(1, "old cleaner: firstUseAge=%d firstUseIndex = %d day %d: rangeStart %d rangeEnd %d rangeSize=%d ids[oldRangeStart]=%d\n", firstUseAge, firstUseIndex, day, oldRangeStart, oldRangeEnd, oldRangeEnd-oldRangeStart, ids[oldRangeStart]); //int oldRangeStart = 0; //int oldRangeEnd = firstUseIndex; //verbose(1, "old cleaner: firstUseAge=%d firstUseIndex = %d rangeStart %d rangeEnd %d rangeSize=%d ids[firstUseIndex]=%d\n", //firstUseAge, firstUseIndex, oldRangeStart, oldRangeEnd, oldRangeEnd-oldRangeStart, ids[firstUseIndex]); // newly old can be expected to have some delete action // these records have newly crossed the threshold into being old enough to have possibly expired. int newOldRangeStart = firstUseIndex; int newOldRangeEnd = binaryIdSearch(ids, totalRows, table, firstUseAge - 1); verbose(1, "newOld cleaner: firstUseAge=%d rangeStart %d rangeEnd %d rangeSize=%d ids[newOldRangeStart]=%d\n", firstUseAge, newOldRangeStart, newOldRangeEnd, newOldRangeEnd-newOldRangeStart, ids[newOldRangeStart]); // this is the main delete action of cleaning out new robots (20k to 50k or more) int robo1RangeStart = binaryIdSearch(ids, totalRows, table, 2); int robo1RangeEnd = binaryIdSearch(ids, totalRows, table, 1); verbose(1, "robot cleaner1: twoDayIndex = %d oneDayIndex %d rangeSize=%d ids[rs]=%d\n", robo1RangeStart, robo1RangeEnd, robo1RangeEnd-robo1RangeStart, ids[robo1RangeStart]); int robo2RangeStart = -1; int robo2RangeEnd = -1; if (sameString(table, userDbTableName)) { // secondary robot cleaning only for userDb., produces a somewhat lesser, perhaps 3 to 5k deletions robo2RangeStart = binaryIdSearch(ids, totalRows, table, 7); robo2RangeEnd = binaryIdSearch(ids, totalRows, table, 6); verbose(1, "robot cleaner2: sevenDayIndex = %d sixDayIndex %d rangeSize=%d ids[rs]=%d\n", robo2RangeStart, robo2RangeEnd, robo2RangeEnd-robo2RangeStart, ids[robo2RangeStart]); } /* cannot clean until we have all the ranges determined since deleting messes up binSearch */ if (!optionExists("dryRun")) { verbose(1, "old cleaner:\n"); cleanTableSection(table, ids[oldRangeStart], ids[oldRangeEnd]); } if (!optionExists("dryRun")) { verbose(1, "newOld cleaner:\n"); cleanTableSection(table, ids[newOldRangeStart], ids[newOldRangeEnd]); } if (!optionExists("dryRun")) { verbose(1, "robot cleaner1:\n"); cleanTableSection(table, ids[robo1RangeStart], ids[robo1RangeEnd]); } if (sameString(table, userDbTableName)) { if (!optionExists("dryRun")) { verbose(1, "robot cleaner2:\n"); cleanTableSection(table, ids[robo2RangeStart], ids[robo2RangeEnd]); } } } /* int found = binaryIdSearch(ids, totalRows, table, 1); if ((found >= 0) && (found < totalRows)) verbose(1, "1 days ago found = %d, id == ids[found] = %d \n", found, ids[found]); found = binaryIdSearch(ids, totalRows, table, 2); if ((found >= 0) && (found < totalRows)) verbose(1, "2 days ago found = %d, id == ids[found] = %d \n", found, ids[found]); found = binaryIdSearch(ids, totalRows, table, 30); if ((found >= 0) && (found < totalRows)) verbose(1, "30 days ago found = %d, id == ids[found] = %d \n", found, ids[found]); */ /* if (daysAgoFirstUse < 14) { hitEnd = TRUE; break; } */ /* if (daysAgoFirstUse < 365) { hitEnd = TRUE; break; } */ // may need to pass back this data from the cleanTableSection call TODO //verbose(1, "%s: #rows count=%d delCount=%d\n\n", table, count, delCount); time_t cleanEnd = time(NULL); int minutes = difftime(cleanEnd, cleanStart) / 60; verbose(1, "%s\n", ctime(&cleanEnd)); verbose(1, "%d minutes total\n\n", minutes); squealed = checkMaxTableSizeExceeded(table); return squealed; }
void findToFixBedGraphLimits(char *input, char *output) /* findToFixBedGraphLimits - Scan through ra file of bedGraphs and calculate limits.. */ { struct lineFile *lf = lineFileOpen(input, TRUE); FILE *f = mustOpen(output, "w"); struct slPair *el, *list; while ((list = raNextRecordAsSlPairList(lf)) != NULL) { /* Find required fields for calcs. */ char *db = mustFindVal(list, "db", lf); char *track = mustFindVal(list, "track", lf); char *type = cloneString(mustFindVal(list, "type", lf)); /* Parse out type value, which should be "bedGraph 4" and put the 4 or whatever other number * in dataFieldIndex. */ char *typeWords[3]; int typeWordCount = chopLine(type, typeWords); if (typeWordCount != 2 || !sameString(typeWords[0], "bedGraph")) errAbort("Not well formed bedGraph type line %d of %s", lf->lineIx, lf->fileName); int dataFieldIndex = sqlUnsigned(typeWords[1]); /* Figure out field corresponding to dataFieldIndex. */ struct sqlConnection *conn = sqlConnect(db); struct slName *fieldList = sqlFieldNames(conn, track); struct slName *pastBin = fieldList; if (sameString(pastBin->name, "bin")) pastBin = pastBin->next; struct slName *fieldName = slElementFromIx(pastBin, dataFieldIndex - 1); if (fieldName == NULL) errAbort("%s doesn't have enough fields", track); char *field = fieldName->name; assert(sqlFieldIndex(conn, track, field) >= 0); /* Print reassuring status message */ verbose(1, "%s.%s has %d elements. Data field is %s\n", db, track, sqlTableSize(conn, track), field); /* Get min/max dataValues in fields. Do it ourselves rather than using SQL min/max because sometimes * the data field is a name column.... */ char query[512]; safef(query, sizeof(query), "select %s from %s", field, track); struct sqlResult *sr = sqlGetResult(conn, query); char **row; row = sqlNextRow(sr); assert(row != NULL); double val = sqlDouble(row[0]); double minLimit = val, maxLimit = val; while ((row = sqlNextRow(sr)) != 0) { double val = sqlDouble(row[0]); if (val < minLimit) minLimit = val; if (val > maxLimit) maxLimit = val; } sqlFreeResult(&sr); verbose(1, " %g %g\n", minLimit, maxLimit); /* Output original table plus new minLimit/maxLimit. */ for (el = list; el != NULL; el = el->next) fprintf(f, "%s %s\n", el->name, (char *)el->val); fprintf(f, "minLimit %g\n", minLimit); fprintf(f, "maxLimit %g\n", maxLimit); fprintf(f, "\n"); sqlDisconnect(&conn); slFreeList(&fieldList); slPairFreeValsAndList(&list); } lineFileClose(&lf); carefulClose(&f); }