long long currentVmPeak() /* return value of peak Vm memory usage (if /proc/ business exists) */ { long long vmPeak = 0; pid_t pid = getpid(); char temp[256]; safef(temp, sizeof(temp), "/proc/%d/status", (int) pid); struct lineFile *lf = lineFileMayOpen(temp, TRUE); if (lf) { char *line; while (lineFileNextReal(lf, &line)) { // typical line: 'VmPeak: 62646196 kB' // seems to always be kB if (stringIn("VmPeak", line)) { char *words[3]; chopByWhite(line, words, 3); vmPeak = sqlLongLong(words[1]); // assume always 2nd word break; } } lineFileClose(&lf); } return vmPeak; }
static boolean singleFilter(struct annoFilter *filter, char **row, int rowSize) /* Apply one filter, using either filterFunc or type-based filter on column value. * Return TRUE if isExclude and filter passes, or if !isExclude and filter fails. */ { boolean fail = FALSE; if (filter->filterFunc != NULL) fail = filter->filterFunc(filter, row, rowSize); else if (filter->op == afMatch) fail = !wildMatch((char *)(filter->values), row[filter->columnIx]); else if (filter->op == afNotMatch) fail = wildMatch((char *)(filter->values), row[filter->columnIx]); else { // column is a number -- integer or floating point? enum asTypes type = filter->type; if (asTypesIsFloating(type)) fail = annoFilterDouble(filter, sqlDouble(row[filter->columnIx])); else if (asTypesIsInt(type)) fail = annoFilterLongLong(filter, sqlLongLong(row[filter->columnIx])); else errAbort("annoFilterRowFails: unexpected enum asTypes %d for numeric filter op %d", type, filter->op); } if ((filter->isExclude && !fail) || (!filter->isExclude && fail)) return TRUE; return FALSE; }
void sqlLongLongStaticArray(char *s, long long **retArray, int *retSize) /* Convert comma separated list of numbers to an array which will be * overwritten next call to this function, but need not be freed. */ { static long long *array = NULL; static unsigned alloc = 0; unsigned count = 0; for (;;) { char *e; if (s == NULL || s[0] == 0) break; e = strchr(s, ','); if (e != NULL) *e++ = 0; if (count >= alloc) { if (alloc == 0) alloc = 64; else alloc <<= 1; ExpandArray(array, count, alloc); } array[count++] = sqlLongLong(s); s = e; } *retSize = count; *retArray = array; }
void recordIntoHistory(struct sqlConnection *conn, unsigned id, char *table, boolean success) /* Record success/failure into uploadAttempts and historyBits fields of table. */ { /* Get historyBits and fold status into it. */ char quickResult[32]; char query[256]; sqlSafef(query, sizeof(query), "select historyBits from %s where id=%u", table, id); if (sqlQuickQuery(conn, query, quickResult, sizeof(quickResult)) == NULL) internalErr(); char *lastTimeField; char *openResultField; long long historyBits = sqlLongLong(quickResult); historyBits <<= 1; if (success) { historyBits |= 1; lastTimeField = "lastOkTime"; openResultField = "openSuccesses"; } else { lastTimeField = "lastNotOkTime"; openResultField = "openFails"; } sqlSafef(query, sizeof(query), "update %s set historyBits=%lld, %s=%s+1, %s=%lld " "where id=%lld", table, historyBits, openResultField, openResultField, lastTimeField, edwNow(), (long long)id); sqlUpdate(conn, query); }
boolean checkMaxTableSizeExceeded(char *table) /* check if max table size has been exceeded, send email warning if so */ { boolean squealed = FALSE; long long dataLength = 0; long long dataFree = 0; struct sqlResult *sr; char **row; char query[256]; sqlSafef(query, sizeof(query), "show table status like '%s'", table ); sr = sqlGetResult(conn, query); row = sqlNextRow(sr); if (!row) errAbort("error fetching table status"); int dlField = sqlFieldColumn(sr, "Data_length"); if (dlField == -1) errAbort("error finding field 'Data_length' in show table status resultset"); dataLength = sqlLongLong(row[dlField]); int dfField = sqlFieldColumn(sr, "Data_free"); if (dfField == -1) errAbort("error finding field 'Data_free' in show table status resultset"); dataFree = sqlLongLong(row[dfField]); verbose(1, "%s: Data_length=%lld Data_free=%lld\n\n", table, dataLength, dataFree); if ((dataLength / (1024 * 1024 * 1024)) >= squealSize) { char msg[256]; char cmdLine[256]; char *emailList = "[email protected] [email protected] [email protected]"; safef(msg, sizeof(msg), "BIG HGCENTRAL TABLE %s data_length: %lld data_free: %lld\n" , table, dataLength, dataFree); printf("%s", msg); safef(cmdLine, sizeof(cmdLine), "echo '%s'|mail -s 'WARNING hgcentral cleanup detected data_length max size %d GB exceeded' %s" , msg , squealSize , emailList ); system(cmdLine); squealed = TRUE; } sqlFreeResult(&sr); return squealed; }
void tableStatusStaticLoad(char **row, struct tableStatus *ret) /* Load a row from tableStatus table into ret. The contents of ret will * be replaced at the next call to this function. */ { ret->name = row[0]; ret->type = row[1]; ret->rowFormat = row[2]; ret->rows = sqlUnsigned(row[3]); ret->aveRowLength = sqlUnsigned(row[4]); ret->dataLength = sqlLongLong(row[5]); ret->maxDataLength = sqlLongLong(row[6]); ret->indexLength = sqlLongLong(row[7]); ret->dataFree = sqlLongLong(row[8]); ret->autoIncrement = row[9]; ret->createTime = row[10]; ret->updateTime = row[11]; ret->checkTime = row[12]; ret->createOptions = row[13]; ret->comment = row[14]; }
void scoredRefStaticLoad(char **row, struct scoredRef *ret) /* Load a row from scoredRef table into ret. The contents of ret will * be replaced at the next call to this function. */ { ret->chrom = row[0]; ret->chromStart = sqlUnsigned(row[1]); ret->chromEnd = sqlUnsigned(row[2]); ret->extFile = sqlUnsigned(row[3]); ret->offset = sqlLongLong(row[4]); ret->score = atof(row[5]); }
long long edwGotFile(struct sqlConnection *conn, char *submitDir, char *submitFileName, char *md5, long long size) /* See if we already got file. Return fileId if we do, otherwise -1. This returns * TRUE based mostly on the MD5sum. For short files (less than 100k) then we also require * the submitDir and submitFileName to match. This is to cover the case where you might * have legitimate empty files duplicated even though they were computed based on different * things. For instance coming up with no peaks is a legitimate result for many chip-seq * experiments. */ { /* For large files just rely on MD5. */ char query[PATH_LEN+512]; if (size > 100000) { sqlSafef(query, sizeof(query), "select id from edwFile where md5='%s' order by submitId desc limit 1" , md5); long long result = sqlQuickLongLong(conn, query); if (result == 0) result = -1; return result; } /* Rest of the routine deals with smaller files, which we are less worried about * duplicating, and indeed expect a little duplication of the empty file if none * other. */ /* First see if we have even got the directory. */ sqlSafef(query, sizeof(query), "select id from edwSubmitDir where url='%s'", submitDir); int submitDirId = sqlQuickNum(conn, query); if (submitDirId <= 0) return -1; /* The complex truth is that we may have gotten this file multiple times. * We return the most recent version where it got uploaded and passed the post-upload * MD5 sum, and thus where the MD5 field is filled in the database. */ sqlSafef(query, sizeof(query), "select md5,id from edwFile " "where submitFileName='%s' and submitDirId = %d and md5 != '' " "order by submitId desc limit 1" , submitFileName, submitDirId); struct sqlResult *sr = sqlGetResult(conn, query); char **row; long fileId = -1; if ((row = sqlNextRow(sr)) != NULL) { char *dbMd5 = row[0]; if (sameWord(md5, dbMd5)) fileId = sqlLongLong(row[1]); } sqlFreeResult(&sr); return fileId; }
long long sqlLongLongComma(char **pS) /* Return offset (often 64 bits) at *pS. Advance *pS past comma at * end */ { char *s = *pS; char *e = strchr(s, ','); long long ret; *e++ = 0; *pS = e; ret = sqlLongLong(s); return ret; }
static void setStepSize(char *spec) /* given *spec: "step=<position>", parse and set stepSize and dataSpan */ { char *wordPairs[2]; int wc; char *clone; clone = cloneString(spec); wc = chopByChar(spec, '=', wordPairs, 2); if (wc != 2) errAbort("Expecting step=<size>, no <size> found in %s at line %lu", clone, lineCount); stepSize=sqlLongLong(wordPairs[1]); freeMem(clone); }
struct tableStatus *tableStatusLoad(char **row) /* Load a tableStatus from row fetched with select * from tableStatus * from database. Dispose of this with tableStatusFree(). */ { struct tableStatus *ret; AllocVar(ret); ret->name = cloneString(row[0]); ret->type = cloneString(row[1]); ret->rowFormat = cloneString(row[2]); ret->rows = sqlUnsigned(row[3]); ret->aveRowLength = sqlUnsigned(row[4]); ret->dataLength = sqlLongLong(row[5]); ret->maxDataLength = sqlLongLong(row[6]); ret->indexLength = sqlLongLong(row[7]); ret->dataFree = sqlLongLong(row[8]); ret->autoIncrement = cloneString(row[9]); ret->createTime = cloneString(row[10]); ret->updateTime = cloneString(row[11]); ret->checkTime = cloneString(row[12]); ret->createOptions = cloneString(row[13]); ret->comment = cloneString(row[14]); return ret; }
struct scoredRef *scoredRefLoad(char **row) /* Load a scoredRef from row fetched with select * from scoredRef * from database. Dispose of this with scoredRefFree(). */ { struct scoredRef *ret; AllocVar(ret); ret->chrom = cloneString(row[0]); ret->chromStart = sqlUnsigned(row[1]); ret->chromEnd = sqlUnsigned(row[2]); ret->extFile = sqlUnsigned(row[3]); ret->offset = sqlLongLong(row[4]); ret->score = atof(row[5]); return ret; }
static void setDataSpan(char *spec) /* given *spec: "dataSpan=N", set parse and dataSpan to N */ { char *wordPairs[2]; int wc; char *clone; clone = cloneString(spec); wc = chopByChar(spec, '=', wordPairs, 2); if (wc != 2) errAbort("Expecting span=N, no N found in %s at line %lu", clone, lineCount); dataSpan = sqlLongLong(wordPairs[1]); freeMem(clone); }
static void brokenRefPepGetSeqScan(struct sqlConnection *conn, struct extFileTbl* extFileTbl, struct brokenRefPepTbl *brpTbl) /* load refSeq peps that have seq or extFile problems, including * checking fasta file contents*/ { static char *query = NOSQLINJ "select id, acc, version, size, gbExtFile, file_offset, file_size " "from gbSeq where (acc like \"NP__%\") or (acc like \"YP__%\")"; struct sqlResult *sr = sqlGetResult(conn, query); char **row; while ((row = sqlNextRow(sr)) != NULL) brokenRefPepSeqCheck(conn, extFileTbl, brpTbl, sqlSigned(row[0]), row[1], sqlSigned(row[2]), sqlUnsigned(row[3]), sqlUnsigned(row[4]), sqlLongLong(row[5]), sqlUnsigned(row[6])); sqlFreeResult(&sr); }
int sqlLongLongArray(char *s, long long *array, int arraySize) /* Convert comma separated list of numbers to an array. Pass in * array and max size of array. */ { unsigned count = 0; for (;;) { char *e; if (s == NULL || s[0] == 0 || count == arraySize) break; e = strchr(s, ','); if (e != NULL) *e++ = 0; array[count++] = sqlLongLong(s); s = e; } return count; }
static void setFixedStart(char *spec) /* given *spec: "start=<position>", parse and set fixedStart */ { char *wordPairs[2]; int wc; char *clone; clone = cloneString(spec); wc = chopByChar(spec, '=', wordPairs, 2); if (wc != 2) errAbort("Expecting start=<position>, no <position> found in %s at line %lu", clone, lineCount); fixedStart=sqlLongLong(wordPairs[1]); if (fixedStart == 0) errAbort("Found start=0 at line %lu, the first chrom position is 1, not 0", lineCount); else fixedStart = BASE_0(fixedStart); /* zero relative half-open */ freeMem(clone); }
void cdwReallyRemoveFiles(char *email, char *submitUrl, int fileCount, char *fileIds[]) /* cdwReallyRemoveFiles - Remove files from data warehouse. Generally you want to depricate them * instead. */ { /* First convert all fileIds to binary. Do this first so bad command lines get caught. */ long long ids[fileCount]; int i; for (i = 0; i<fileCount; ++i) ids[i] = sqlLongLong(fileIds[i]); /* Get hash of all submissions by user from that URL. Hash is keyed by ascii version of * submitId. */ struct sqlConnection *conn = cdwConnectReadWrite(); struct cdwUser *user = cdwMustGetUserFromEmail(conn, email); char query[256]; sqlSafef(query, sizeof(query), " select cdwSubmit.id,cdwSubmitDir.id from cdwSubmit,cdwSubmitDir " " where cdwSubmit.submitDirId=cdwSubmitDir.id and userId=%d " " and cdwSubmitDir.url='%s' ", user->id, submitUrl); struct hash *submitHash = sqlQuickHash(conn, query); /* Make sure that files and submission really go together. */ for (i=0; i<fileCount; ++i) { long long fileId = ids[i]; char buf[64]; sqlSafef(query, sizeof(query), "select submitId from cdwFile where id=%lld", fileId); char *result = sqlQuickQuery(conn, query, buf, sizeof(buf)); if (result == NULL) errAbort("%lld is not a fileId in the warehouse", fileId); if (hashLookup(submitHash, result) == NULL) errAbort("File ID %lld does not belong to submission set based on %s", fileId, submitUrl); } /* OK - paranoid checking is done, now let's remove each file from the tables it is in. */ for (i=0; i<fileCount; ++i) { cdwReallyRemoveFile(conn, ids[i], really); } }
static void raInfoAdd(struct raInfoTbl *rit, struct hash *raRec, char *acc, short ver, char *seqSzFld, char *offFld, char *recSzFld, unsigned extFileId) /* add a ra mrna or pep */ { struct hashEl *hel; struct raInfo *ri; char accVer[GB_ACC_BUFSZ]; if (extFileId == 0) errAbort("no extFileId for %s.%d", acc, ver); gbVerbPr(10, "raAdd %s.%d ext %d", acc, ver, extFileId); lmAllocVar(rit->accMap->lm, ri); safef(accVer, sizeof(accVer), "%s.%d", acc, ver); hel = hashAdd(rit->accMap, accVer, ri); ri->acc = lmCloneString(rit->accMap->lm, acc); ri->version = ver; ri->size = sqlUnsigned((char*)hashMustFindVal(raRec, seqSzFld)); ri->offset = sqlLongLong((char*)hashMustFindVal(raRec, offFld)); ri->fileSize = sqlUnsigned((char*)hashMustFindVal(raRec, recSzFld)); ri->extFileId = extFileId; }
void edwChangeFormat(char *format, int idCount, char *idStrings[]) /* edwChangeFormat - Change format and force a revalidation for a file.. */ { struct sqlConnection *conn = edwConnectReadWrite(); /* Convert ascii id's to valid file ids so we catch errors early. */ long long ids[idCount]; struct edwValidFile *vfs[idCount]; int i; for (i=0; i<idCount; ++i) { long long id = ids[i] = sqlLongLong(idStrings[i]); struct edwValidFile *vf = vfs[i] = edwValidFileFromFileId(conn, id); if (vf == NULL) errAbort("%lld is not a fileId in the edwValidFile table", id); } /* Loop through each file and change format. */ for (i=0; i<idCount; ++i) { changeFormat(conn, vfs[i], format); } sqlDisconnect(&conn); }
void edwParseSubmitFile(struct sqlConnection *conn, char *submitLocalPath, char *submitUrl, struct submitFileRow **retSubmitList) /* Load and parse up this file as fielded table, make sure all required fields are there, * and calculate indexes of required fields. This produces an edwFile list, but with * still quite a few fields missing - just what can be filled in from submit filled in. * The submitUrl is just used for error reporting. If it's local, just make it the * same as submitLocalPath. */ { char *requiredFields[] = {"file_name", "format", "output_type", "experiment", "replicate", "enriched_in", "md5_sum", "size", "modified", "valid_key"}; struct fieldedTable *table = fieldedTableFromTabFile(submitLocalPath, submitUrl, requiredFields, ArraySize(requiredFields)); /* Get offsets of all required fields */ int fileIx = stringArrayIx("file_name", table->fields, table->fieldCount); int formatIx = stringArrayIx("format", table->fields, table->fieldCount); int outputIx = stringArrayIx("output_type", table->fields, table->fieldCount); int experimentIx = stringArrayIx("experiment", table->fields, table->fieldCount); int replicateIx = stringArrayIx("replicate", table->fields, table->fieldCount); int enrichedIx = stringArrayIx("enriched_in", table->fields, table->fieldCount); int md5Ix = stringArrayIx("md5_sum", table->fields, table->fieldCount); int sizeIx = stringArrayIx("size", table->fields, table->fieldCount); int modifiedIx = stringArrayIx("modified", table->fields, table->fieldCount); int validIx = stringArrayIx("valid_key", table->fields, table->fieldCount); /* See if we're doing replacement and check have all columns needed if so. */ int replacesIx = stringArrayIx(replacesTag, table->fields, table->fieldCount); int replaceReasonIx = stringArrayIx(replaceReasonTag, table->fields, table->fieldCount); boolean doReplace = (replacesIx != -1); if (doReplace) if (replaceReasonIx == -1) errAbort("Error: got \"%s\" column without \"%s\" column in %s.", replacesTag, replaceReasonTag, submitUrl); /* Loop through and make sure all field values are ok */ struct fieldedRow *fr; for (fr = table->rowList; fr != NULL; fr = fr->next) { char **row = fr->row; char *fileName = row[fileIx]; allGoodFileNameChars(fileName); char *format = row[formatIx]; if (!isSupportedFormat(format)) errAbort("Format %s is not supported", format); allGoodSymbolChars(row[outputIx]); char *experiment = row[experimentIx]; if (!isExperimentId(experiment)) errAbort("%s in experiment field does not seem to be an encode experiment", experiment); char *replicate = row[replicateIx]; if (differentString(replicate, "pooled") && differentString(replicate, "n/a") ) if (!isAllNum(replicate)) errAbort("%s is not a good value for the replicate column", replicate); char *enriched = row[enrichedIx]; if (!encode3CheckEnrichedIn(enriched)) errAbort("Enriched_in %s is not supported", enriched); char *md5 = row[md5Ix]; if (strlen(md5) != 32 || !isAllHexLower(md5)) errAbort("md5 '%s' is not in all lower case 32 character hexadecimal format.", md5); char *size = row[sizeIx]; if (!isAllNum(size)) errAbort("Invalid size '%s'", size); char *modified = row[modifiedIx]; if (!isAllNum(modified)) errAbort("Invalid modification time '%s'", modified); char *validIn = row[validIx]; char *realValid = encode3CalcValidationKey(md5, sqlLongLong(size)); if (!sameString(validIn, realValid)) errAbort("The valid_key %s for %s doesn't fit", validIn, fileName); freez(&realValid); if (doReplace) { char *replaces = row[replacesIx]; char *reason = row[replaceReasonIx]; if (!isEmptyOrNa(replaces)) { char *prefix = edwLicensePlateHead(conn); if (!startsWith(prefix, replaces)) errAbort("%s in replaces column is not an ENCODE file accession", replaces); if (isEmptyOrNa(reason)) errAbort("Replacing %s without a reason\n", replaces); } } } *retSubmitList = submitFileRowFromFieldedTable(conn, table, fileIx, md5Ix, sizeIx, modifiedIx, replacesIx, replaceReasonIx); }
struct cdwBamFile *cdwBamFileFromNextRa(struct lineFile *lf, struct raToStructReader *reader) /* Return next stanza put into an cdwBamFile. */ { enum fields { isPairedField, isSortedByTargetField, readCountField, readBaseCountField, mappedCountField, uniqueMappedCountField, readSizeMeanField, readSizeStdField, readSizeMinField, readSizeMaxField, u4mReadCountField, u4mUniquePosField, u4mUniqueRatioField, targetBaseCountField, targetSeqCountField, }; if (!raSkipLeadingEmptyLines(lf, NULL)) return NULL; struct cdwBamFile *el; AllocVar(el); bool *fieldsObserved = reader->fieldsObserved; bzero(fieldsObserved, reader->fieldCount); char *tag, *val; while (raNextTagVal(lf, &tag, &val, NULL)) { struct hashEl *hel = hashLookup(reader->fieldIds, tag); if (hel != NULL) { int id = ptToInt(hel->val); if (fieldsObserved[id]) errAbort("Duplicate tag %s line %d of %s\n", tag, lf->lineIx, lf->fileName); fieldsObserved[id] = TRUE; switch (id) { case isPairedField: { el->isPaired = sqlSigned(val); break; } case isSortedByTargetField: { el->isSortedByTarget = sqlSigned(val); break; } case readCountField: { el->readCount = sqlLongLong(val); break; } case readBaseCountField: { el->readBaseCount = sqlLongLong(val); break; } case mappedCountField: { el->mappedCount = sqlLongLong(val); break; } case uniqueMappedCountField: { el->uniqueMappedCount = sqlLongLong(val); break; } case readSizeMeanField: { el->readSizeMean = sqlDouble(val); break; } case readSizeStdField: { el->readSizeStd = sqlDouble(val); break; } case readSizeMinField: { el->readSizeMin = sqlSigned(val); break; } case readSizeMaxField: { el->readSizeMax = sqlSigned(val); break; } case u4mReadCountField: { el->u4mReadCount = sqlSigned(val); break; } case u4mUniquePosField: { el->u4mUniquePos = sqlSigned(val); break; } case u4mUniqueRatioField: { el->u4mUniqueRatio = sqlDouble(val); break; } case targetBaseCountField: { el->targetBaseCount = sqlLongLong(val); break; } case targetSeqCountField: { el->targetSeqCount = sqlUnsigned(val); break; } default: internalErr(); break; } } } raToStructReaderCheckRequiredFields(reader, lf); return el; }
struct edwFastqFile *edwFastqFileFromNextRa(struct lineFile *lf, struct raToStructReader *reader) /* Return next stanza put into an edwFastqFile. */ { enum fields { readCountField, baseCountField, readSizeMeanField, readSizeStdField, readSizeMinField, readSizeMaxField, qualMeanField, qualStdField, qualMinField, qualMaxField, qualTypeField, qualZeroField, atRatioField, aRatioField, cRatioField, gRatioField, tRatioField, nRatioField, posCountField, qualPosField, aAtPosField, cAtPosField, gAtPosField, tAtPosField, nAtPosField, }; if (!raSkipLeadingEmptyLines(lf, NULL)) return NULL; struct edwFastqFile *el; AllocVar(el); bool *fieldsObserved = reader->fieldsObserved; bzero(fieldsObserved, reader->fieldCount); char *tag, *val; while (raNextTagVal(lf, &tag, &val, NULL)) { struct hashEl *hel = hashLookup(reader->fieldIds, tag); if (hel != NULL) { int id = ptToInt(hel->val); if (fieldsObserved[id]) errAbort("Duplicate tag %s line %d of %s\n", tag, lf->lineIx, lf->fileName); fieldsObserved[id] = TRUE; switch (id) { case readCountField: { el->readCount = sqlLongLong(val); break; } case baseCountField: { el->baseCount = sqlLongLong(val); break; } case readSizeMeanField: { el->readSizeMean = sqlDouble(val); break; } case readSizeStdField: { el->readSizeStd = sqlDouble(val); break; } case readSizeMinField: { el->readSizeMin = sqlSigned(val); break; } case readSizeMaxField: { el->readSizeMax = sqlSigned(val); break; } case qualMeanField: { el->qualMean = sqlDouble(val); break; } case qualStdField: { el->qualStd = sqlDouble(val); break; } case qualMinField: { el->qualMin = sqlDouble(val); break; } case qualMaxField: { el->qualMax = sqlDouble(val); break; } case qualTypeField: { el->qualType = cloneString(val); break; } case qualZeroField: { el->qualZero = sqlSigned(val); break; } case atRatioField: { el->atRatio = sqlDouble(val); break; } case aRatioField: { el->aRatio = sqlDouble(val); break; } case cRatioField: { el->cRatio = sqlDouble(val); break; } case gRatioField: { el->gRatio = sqlDouble(val); break; } case tRatioField: { el->tRatio = sqlDouble(val); break; } case nRatioField: { el->nRatio = sqlDouble(val); break; } case posCountField: { int arraySize = sqlSigned(val); raToStructArraySignedSizer(lf, arraySize, &el->posCount, "posCount"); break; } case qualPosField: { int arraySize; sqlDoubleDynamicArray(val, &el->qualPos, &arraySize); raToStructArraySignedSizer(lf, arraySize, &el->posCount, "qualPos"); break; } case aAtPosField: { int arraySize; sqlDoubleDynamicArray(val, &el->aAtPos, &arraySize); raToStructArraySignedSizer(lf, arraySize, &el->posCount, "aAtPos"); break; } case cAtPosField: { int arraySize; sqlDoubleDynamicArray(val, &el->cAtPos, &arraySize); raToStructArraySignedSizer(lf, arraySize, &el->posCount, "cAtPos"); break; } case gAtPosField: { int arraySize; sqlDoubleDynamicArray(val, &el->gAtPos, &arraySize); raToStructArraySignedSizer(lf, arraySize, &el->posCount, "gAtPos"); break; } case tAtPosField: { int arraySize; sqlDoubleDynamicArray(val, &el->tAtPos, &arraySize); raToStructArraySignedSizer(lf, arraySize, &el->posCount, "tAtPos"); break; } case nAtPosField: { int arraySize; sqlDoubleDynamicArray(val, &el->nAtPos, &arraySize); raToStructArraySignedSizer(lf, arraySize, &el->posCount, "nAtPos"); break; } default: internalErr(); break; } } } raToStructReaderCheckRequiredFields(reader, lf); return el; }
/* The single externally visible routine. * Future improvements will need to add a couple more arguments to * satisify the needs of the command line version and its options. * Currently, this is used only in customTrack input parsing. */ void wigAsciiToBinary( char *wigAscii, char *wigFile, char *wibFile, double *upperLimit, double *lowerLimit, struct wigEncodeOptions *options) /* given the three file names, read the ascii wigAscii file and produce * the wigFile and wibFile outputs */ { struct lineFile *lf; /* for line file utilities */ char *line = (char *) NULL; /* to receive data input line */ char *words[10]; /* to split data input line */ int wordCount = 0; /* result of split */ int validLines = 0; /* counting only lines with data */ double dataValue = 0.0; /* from data input */ boolean bedData = FALSE; /* in bed format data */ boolean variableStep = FALSE; /* in variableStep data */ boolean fixedStep = FALSE; /* in fixedStep data */ char *prevChromName = (char *)NULL; /* to watch for chrom name changes */ int trackCount = 0; /* We abort if we see more than one track. */ if ((wigAscii == (char *)NULL) || (wigFile == (char *)NULL) || (wibFile == (char *)NULL)) errAbort("wigAsciiToBinary: missing data file names, ascii: %s, wig: %s, wib: %s", wigAscii, wigFile, wibFile); /* need to be careful here and initialize all the global variables */ freez(&wibFileName); /* send this name to the global */ wibFileName = cloneString(wibFile); /* variable for use in output_row() */ lineCount = 0; /* to count all lines */ add_offset = 0; /* to allow "lifting" of the data */ validLines = 0; /* to count only lines with data */ rowCount = 0; /* to count rows output */ bincount = 0; /* to count up to binsize */ binsize = 1024; /* # of data points per table row */ dataSpan = 1; /* default bases spanned per data point */ chromStart = 0; /* for table row data */ previousOffset = 0; /* for data missing detection */ fileOffset = 0; /* current location within binary data file */ fileOffsetBegin = 0;/* location in binary data file where this bin starts*/ freez(&data_values); freez(&validData); data_values = (double *) needMem( (size_t) (binsize * sizeof(double))); validData = (unsigned char *) needMem( (size_t) (binsize * sizeof(unsigned char))); if (options != NULL) { if (options->lift != 0) add_offset = options->lift; if (options->noOverlap) noOverlap = TRUE; if (options->flagOverlapSpanData) flagOverlapSpanData = TRUE; if (options->wibSizeLimit > 0) wibSizeLimit = options->wibSizeLimit; } /* limits for the complete set of data, they must change from these initial defaults during processing */ overallLowerLimit = wigEncodeStartingLowerLimit; overallUpperLimit = wigEncodeStartingUpperLimit; binout = mustOpen(wibFile,"w"); /* binary data file */ wigout = mustOpen(wigFile,"w"); /* table row definition file */ #if defined(DEBUG) /* dbg */ chmod(wibFile, 0666); chmod(wigFile, 0666); #endif lf = lineFileOpen(wigAscii, TRUE); /* input file */ while (lineFileNext(lf, &line, NULL)) { boolean readingFrameSlipped; ++lineCount; if ((wibSizeLimit > 0) && (wibSize >= wibSizeLimit)) errAbort("data size limit of %lld data values has been exceeded. This data can be efficiently displayed with the <A HREF='/goldenPath/help/bigWig.html' TARGET=_blank>bigWig file format</A> in a custom track, or in a <A HREF='/goldenPath/help/hgTrackHubHelp.html' TARGET=_blank>Track Hub</A> for multiple large datasets.", wibSizeLimit); line = skipLeadingSpaces(line); /* ignore blank or comment lines */ if ((line == (char *)NULL) || (line[0] == '\0') || (line[0] == '#')) continue; /* !!! go to next line of input */ wordCount = chopByWhite(line, words, ArraySize(words)); if (sameWord("track",words[0])) { /* Allow (and ignore) one track line, but no more. */ ++trackCount; if (trackCount > 1) errAbort("Multiple tracks seen, second at line %d of %s, can only handle one.", lf->lineIx, lf->fileName); continue; } else if (sameWord("browser", words[0])) { continue; /* ignore browser lines if present */ } else if (sameWord("variableStep",words[0])) { int i; boolean foundChrom = FALSE; /* safest thing to do if we were processing anything is to * output that previous block and start anew * Future improvement could get fancy here and decide if it * is really necessary to start over, although the concept * of a line between data points on one item may use this * block behavior later to define line segments, so don't * get too quick to be fancy here. This line behavior * implies that feature names will need to be specified to * identify the line segments that belong together. */ if (variableStep || bedData || fixedStep) { output_row(); validLines = 0; /* to cause reset for first offset */ } dataSpan = 1; /* default bases spanned per data point */ for(i = 1; i < wordCount; ++i) { if (startsWith("chrom",words[i])) { setChromName(words[i]); foundChrom = TRUE; } else if (startsWith("span",words[i])) setDataSpan(words[i]); else errAbort("illegal specification on variableStep at line %lu: %s", lineCount, words[i]); } if (!foundChrom) errAbort("missing chrom=<name> specification on variableStep declaration at line %lu", lineCount); variableStep = TRUE; bedData = FALSE; fixedStep = FALSE; freez(&prevChromName); prevChromName = cloneString(chromName); continue; /* !!! go to next input line */ } else if (sameWord("fixedStep",words[0])) { boolean foundChrom = FALSE; boolean foundStart = FALSE; int i; /* same comment as above */ if (variableStep || bedData || fixedStep) { output_row(); validLines = 0; /* to cause reset for first offset */ } stepSize = 1; /* default step size */ dataSpan = 0; /* this will match step size if not set*/ for(i = 1; i < wordCount; ++i) { if (startsWith("chrom",words[i])) { setChromName(words[i]); foundChrom = TRUE; } else if (startsWith("start",words[i])) { setFixedStart(words[i]); foundStart = TRUE; } else if (startsWith("step",words[i])) setStepSize(words[i]); else if (startsWith("span",words[i])) setDataSpan(words[i]); else errAbort("illegal specification on variableStep at line %lu: %s", lineCount, words[i]); } if (dataSpan == 0) dataSpan = stepSize; if (!foundChrom) errAbort("missing chrom=<name> specification on fixedStep declaration at line %lu", lineCount); if (!foundStart) errAbort("missing start=<position> specification on fixedStep declaration at line %lu", lineCount); if (noOverlap && validLines && prevChromName) { if (sameWord(prevChromName,chromName) && (fixedStart < chromStart)) errAbort("specified fixedStep chromStart %llu is less than expected next chromStart %llu", fixedStart, chromStart); } variableStep = FALSE; bedData = FALSE; fixedStep = TRUE; freez(&prevChromName); prevChromName = cloneString(chromName); continue; /* !!! go to next input line */ } else if (wordCount == 4) { /* while in bedData, we do not necessarily need to start a new * batch unless the chrom name is changing, since dataSpan * is always 1 for bedData. As above, this may change in * the future if each bed line specification is talking * about a different feature. */ if (variableStep || fixedStep || (bedData && ((prevChromName != (char *)NULL) && differentWord(prevChromName,words[0])))) { output_row(); validLines = 0; /* to cause reset for first offset */ } dataSpan = 1; /* default bases spanned per data point */ variableStep = FALSE; bedData = TRUE; fixedStep = FALSE; freez(&chromName); chromName=cloneString(words[0]); freez(&featureName); featureName=cloneString(words[0]); bedChromStart = sqlLongLong(words[1]); bedChromEnd = sqlLongLong(words[2]); bedDataValue = sqlDouble(words[3]); /* the bed format coordinate system is zero relative, half-open, * hence, no adjustment of bedChromStart is needed here, unlike the * fixed and variable step formats which will subtract one from the * incoming coordinate. */ if (bedChromStart >= bedChromEnd) errAbort("Found chromStart >= chromEnd at line %lu (%llu > %llu)", lineCount, bedChromStart, bedChromEnd); if (bedChromEnd > (bedChromStart + 10000000)) errAbort("Limit of 10,000,000 length specification for bed format at line %lu, found: %llu)", lineCount, bedChromEnd-bedChromStart); if ((validLines > 0) && (bedChromStart < previousOffset)) errAbort("chrom positions not in numerical order at line %lu. previous: %llu > %llu <-current (bed)", lineCount, previousOffset, bedChromStart); freez(&prevChromName); prevChromName = cloneString(chromName); } /* We must be in one of these data formats at this point */ if (!(variableStep || fixedStep || bedData)) errAbort("at the line beginning: %s, variableStep or fixedStep data declaration not found or BED data 4 column format not recognized.", words[0]); if (variableStep && (wordCount != 2)) errAbort("Expecting two words for variableStep data at line %lu, found %d", lineCount, wordCount); if (fixedStep && (wordCount != 1)) errAbort("Expecting one word for fixedStep data at line %lu, found %d", lineCount, wordCount); if (bedData && (wordCount != 4)) errAbort("Expecting four words for bed format data at line %lu, found %d", lineCount, wordCount); ++validLines; /* counting good lines of data input */ /* Offset is the incoming specified position for this value, * fixedStart has already been converted to zero * relative half open */ if (variableStep) { Offset = sqlLongLong(words[0]); Offset = BASE_0(Offset); /* zero relative half open */ dataValue = sqlDouble(words[1]); } else if (fixedStep) { Offset = fixedStart + (stepSize * (validLines - 1)); dataValue = sqlDouble(words[0]); } else if (bedData) { Offset = bedChromStart; dataValue = bedDataValue; } if (dataValue > overallUpperLimit) overallUpperLimit = dataValue; if (dataValue < overallLowerLimit) overallLowerLimit = dataValue; /* see if this is the first time through, establish chromStart */ if (validLines == 1) { chromStart = Offset; verbose(2, "first offset: %llu\n", chromStart); } else if ((validLines > 1) && (Offset <= previousOffset)) errAbort("chrom positions not in numerical order at line %lu. previous: %llu > %llu " "<-current (offset)", lineCount, BASE_1(previousOffset), BASE_1(Offset)); /* if we are working on a zoom level and the data is not exactly * spaced according to the span, then we need to put each value * in its own row in order to keep positioning correct for these * data values. The number of skipped bases has to be an even * multiple of dataSpan */ readingFrameSlipped = FALSE; if ((validLines > 1) && (dataSpan > 1)) { unsigned long long prevEnd = previousOffset + dataSpan; int skippedBases; int spansSkipped; skippedBases = Offset - previousOffset; if (flagOverlapSpanData && (prevEnd > Offset)) errAbort("ERROR: data points overlapping at input line %lu.\n" "previous data point position: %s:%llu-%llu overlaps current: %s:%llu-%llu", lineCount, chromName, BASE_1(previousOffset), prevEnd, chromName, BASE_1(Offset),Offset+dataSpan); spansSkipped = skippedBases / dataSpan; if ((spansSkipped * dataSpan) != skippedBases) readingFrameSlipped = TRUE; } if (readingFrameSlipped) { verbose(2, "data not spanning %llu bases, prev: %llu, this: %llu, at line: %lu\n", dataSpan, previousOffset, Offset, lineCount); output_row(); chromStart = Offset; /* a full reset here */ } /* Check to see if data is being skipped */ else if ( (validLines > 1) && (Offset > (previousOffset + dataSpan)) ) { unsigned long long off; unsigned long long fillSize; /* number of bytes */ verbose(2, "missing data offsets: %llu - %llu\n", BASE_1(previousOffset),BASE_0(Offset)); /* If we are just going to fill the rest of this bin with * no data, then may as well stop here. No need to fill * it with nothing. */ fillSize = (Offset - (previousOffset + dataSpan)) / dataSpan; verbose(2, "filling NO_DATA for %llu bytes\n", fillSize); if (fillSize + bincount >= binsize) { verbose(2, "completing a bin due to NO_DATA for %llu bytes, only %llu - %llu = %llu to go\n", fillSize, binsize, bincount, binsize - bincount); verbose(2, "Offset: %llu, previousOffset: %llu\n", Offset, previousOffset); output_row(); chromStart = Offset; /* a full reset here */ } else { fillSize = 0; /* fill missing data with NO_DATA indication */ for (off = previousOffset + dataSpan; off < Offset; off += dataSpan) { ++fillSize; ++fileOffset; ++bincount; /* count scores in this bin */ if (bincount >= binsize) break; } verbose(2, "filled NO_DATA for %llu bytes\n", fillSize); /* If that finished off this bin, output it * This most likely should not happen here. The * check above: if (fillSize + bincount >= binsize) * should have caught this case already. */ if (bincount >= binsize) { output_row(); chromStart = Offset; /* a full reset here */ } } } /* With perhaps the missing data taken care of, back to the * real data. */ if (bedData) { unsigned long long bedSize = bedChromEnd - bedChromStart; for ( ; bedSize > 0; --bedSize ) { setDataValue(bedDataValue); Offset += 1; } Offset -= 1; /* loop above increments this one too much. * This Offset is supposed to be the last * valid chrom position written, not the * next to be written */ } else { setDataValue(dataValue); } previousOffset = Offset; /* remember position for gap calculations */ } /* reading file input loop end */ /* Done with input file, any data points left in this bin ? */ if (bincount) output_row(); lineFileClose(&lf); fclose(binout); fclose(wigout); freez(&chromName); freez(&featureName); freez(&data_values); freez(&validData); freez(&wibFileName); /* return limits if pointers are given */ if (upperLimit) *upperLimit = overallUpperLimit; if (lowerLimit) *lowerLimit = overallLowerLimit; if (wibSizeLimit > 0) options->wibSizeLimit = wibSize; }
struct edwQaWigSpot *edwQaWigSpotFromNextRa(struct lineFile *lf, struct raToStructReader *reader) /* Return next stanza put into an edwQaWigSpot. */ { enum fields { spotRatioField, enrichmentField, basesInGenomeField, basesInSpotsField, sumSignalField, spotSumSignalField, }; if (!raSkipLeadingEmptyLines(lf, NULL)) return NULL; struct edwQaWigSpot *el; AllocVar(el); bool *fieldsObserved = reader->fieldsObserved; bzero(fieldsObserved, reader->fieldCount); char *tag, *val; while (raNextTagVal(lf, &tag, &val, NULL)) { struct hashEl *hel = hashLookup(reader->fieldIds, tag); if (hel != NULL) { int id = ptToInt(hel->val); if (fieldsObserved[id]) errAbort("Duplicate tag %s line %d of %s\n", tag, lf->lineIx, lf->fileName); fieldsObserved[id] = TRUE; switch (id) { case spotRatioField: { el->spotRatio = sqlDouble(val); break; } case enrichmentField: { el->enrichment = sqlDouble(val); break; } case basesInGenomeField: { el->basesInGenome = sqlLongLong(val); break; } case basesInSpotsField: { el->basesInSpots = sqlLongLong(val); break; } case sumSignalField: { el->sumSignal = sqlDouble(val); break; } case spotSumSignalField: { el->spotSumSignal = sqlDouble(val); break; } default: internalErr(); break; } } } raToStructReaderCheckRequiredFields(reader, lf); return el; }
struct submitFileRow *submitFileRowFromFieldedTable( struct sqlConnection *conn, struct fieldedTable *table, int fileIx, int md5Ix, int sizeIx, int modifiedIx, int replacesIx, int replaceReasonIx) /* Turn parsed out table (still all just strings) into list of edwFiles. */ { struct submitFileRow *sfr, *sfrList = NULL; struct edwFile *bf; struct fieldedRow *fr; struct dyString *tags = dyStringNew(0); char *ucscDbTag = "ucsc_db"; int ucscDbField = stringArrayIx(ucscDbTag, table->fields, table->fieldCount); for (fr = table->rowList; fr != NULL; fr = fr->next) { char **row = fr->row; AllocVar(bf); bf->submitFileName = cloneString(row[fileIx]); safef(bf->md5, sizeof(bf->md5), "%s", row[md5Ix]); bf->size = sqlLongLong(row[sizeIx]); bf->updateTime = sqlLongLong(row[modifiedIx]); /* Add as tags any fields not included in fixed fields. */ dyStringClear(tags); int i; for (i=0; i<table->fieldCount; ++i) { if (i != fileIx && i != md5Ix && i != sizeIx && i != modifiedIx) { cgiEncodeIntoDy(table->fields[i], row[i], tags); } } if (ucscDbField < 0) { /* Try to make this field up from file name */ char *slash = strchr(bf->submitFileName, '/'); if (slash == NULL) errAbort("Can't make up '%s' field from '%s'", ucscDbTag, bf->submitFileName); int len = slash - bf->submitFileName; char ucscDbVal[len+1]; memcpy(ucscDbVal, bf->submitFileName, len); ucscDbVal[len] = 0; /* Do a little check on it */ if (!sameString("mm9", ucscDbVal) && !sameString("mm10", ucscDbVal) && !sameString("dm3", ucscDbVal) && !sameString("ce10", ucscDbVal) && !sameString("hg19", ucscDbVal)) errAbort("Unrecognized ucsc_db %s - please arrange files so that the top " "level directory in the fileName in the manifest is a UCSC database name " "like 'hg19' or 'mm10.' Alternatively please include a ucsc_db column.", ucscDbVal); /* Add it to tags. */ cgiEncodeIntoDy(ucscDbTag, ucscDbVal, tags); } bf->tags = cloneString(tags->string); /* Fake other fields. */ bf->edwFileName = cloneString(""); /* Allocate wrapper structure */ AllocVar(sfr); sfr->file = bf; /* fill in fields about replacement maybe */ if (replacesIx != -1) { char *replacesAcc = row[replacesIx]; char *reason = row[replaceReasonIx]; int fileId = edwFileIdForLicensePlate(conn, replacesAcc); if (fileId == 0) errAbort("%s in %s column doesn't exist in warehouse", replacesAcc, replacesTag); sfr->replaces = cloneString(replacesAcc); sfr->replaceReason = cloneString(reason); sfr->replacesFile = fileId; } slAddHead(&sfrList, sfr); } slReverse(&sfrList); dyStringFree(&tags); return sfrList; }
void dbTrash(char *db) /* dbTrash - drop tables from a database older than specified N hours. */ { char query[256]; struct sqlResult *sr; char **row; int updateTimeIx; int createTimeIx; int dataLengthIx; int indexLengthIx; int nameIx; int timeIxUsed; unsigned long long totalSize = 0; // expiredTableNames: table exists and is in metaInfo and subject to age limits struct slName *expiredTableNames = NULL; struct slName *lostTables = NULL; // tables existing but not in metaInfo unsigned long long lostTableCount = 0; struct hash *expiredHash = newHash(10); // as determined by metaInfo struct hash *notExpiredHash = newHash(10); struct sqlConnection *conn = sqlConnect(db); if (extFileCheck) checkExtFile(conn); time_t ageSeconds = (time_t)(ageHours * 3600); /* age in seconds */ sqlSafef(query,sizeof(query),"select name,UNIX_TIMESTAMP(lastUse) from %s WHERE " "lastUse < DATE_SUB(NOW(), INTERVAL %ld SECOND);", CT_META_INFO,ageSeconds); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) hashAddInt(expiredHash, row[0], sqlSigned(row[1])); sqlFreeResult(&sr); sqlSafef(query,sizeof(query),"select name,UNIX_TIMESTAMP(lastUse) from %s WHERE " "lastUse >= DATE_SUB(NOW(), INTERVAL %ld SECOND);",CT_META_INFO,ageSeconds); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) hashAddInt(notExpiredHash, row[0], sqlSigned(row[1])); sqlFreeResult(&sr); if (tableStatus) // show table status is very expensive, use only when asked { /* run through the table status business to get table size information */ sqlSafef(query,sizeof(query),"show table status"); STATUS_INIT; while ((row = sqlNextRow(sr)) != NULL) { /* if not doing history too, and this is the history table, next row */ if ((!historyToo) && (sameWord(row[nameIx],"history"))) continue; /* also skip the metaInfo table */ if ((!historyToo) && (sameWord(row[nameIx],CT_META_INFO))) continue; /* don't delete the extFile table */ if (sameWord(row[nameIx],CT_EXTFILE)) continue; SCAN_STATUS; if (hashLookup(expiredHash,row[nameIx])) { slNameAddHead(&expiredTableNames, row[nameIx]); verbose(3,"%s %ld drop %s\n",row[timeIxUsed], (unsigned long)timep, row[nameIx]); /* If sizes are non-NULL, add them up */ if ( ((char *)NULL != row[dataLengthIx]) && ((char *)NULL != row[indexLengthIx]) ) totalSize += sqlLongLong(row[dataLengthIx]) + sqlLongLong(row[indexLengthIx]); hashRemove(expiredHash, row[nameIx]); } else { if (hashLookup(notExpiredHash,row[nameIx])) verbose(3,"%s %ld OK %s\n",row[timeIxUsed], (unsigned long)timep, row[nameIx]); else { /* table exists, but not in metaInfo, is it old enough ? */ if (timep < dropTime) { slNameAddHead(&expiredTableNames, row[nameIx]); verbose(2,"%s %ld dropt %s lost table\n", row[timeIxUsed], (unsigned long)timep, row[nameIx]); /* If sizes are non-NULL, add them up */ if ( ((char *)NULL != row[dataLengthIx]) && ((char *)NULL != row[indexLengthIx]) ) totalSize += sqlLongLong(row[dataLengthIx]) + sqlLongLong(row[indexLengthIx]); } else verbose(3,"%s %ld OKt %s\n",row[timeIxUsed], (unsigned long)timep, row[nameIx]); } } } sqlFreeResult(&sr); } else { // simple 'show tables' is more efficient than 'show table status' sqlSafef(query,sizeof(query),"show tables"); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { if (hashLookup(expiredHash,row[0])) { slNameAddHead(&expiredTableNames, row[0]); time_t lastUse = (time_t)hashIntVal(expiredHash,row[0]); struct tm *lastUseTm = localtime(&lastUse); verbose(3,"%4d-%02d-%02d %02d:%02d:%02d %ld drop %s\n", lastUseTm->tm_year+1900, lastUseTm->tm_mon+1, lastUseTm->tm_mday, lastUseTm->tm_hour, lastUseTm->tm_min, lastUseTm->tm_sec, (unsigned long)lastUse,row[0]); hashRemove(expiredHash, row[0]); } else if (hashLookup(notExpiredHash,row[0])) { time_t lastUse = (time_t)hashIntVal(notExpiredHash,row[0]); struct tm *lastUseTm = localtime(&lastUse); verbose(3,"%4d-%02d-%02d %02d:%02d:%02d %ld OK %s\n", lastUseTm->tm_year+1900, lastUseTm->tm_mon+1, lastUseTm->tm_mday, lastUseTm->tm_hour, lastUseTm->tm_min, lastUseTm->tm_sec, (unsigned long)lastUse,row[0]); } else { struct slName *el = slNameNew(row[0]); slAddHead(&lostTables, el); } } sqlFreeResult(&sr); lostTableCount = slCount(lostTables); // If tables exist, but not in metaInfo, check their age to expire them. // It turns out even this show table status is slow too, so, only // run thru it if asked to eliminate lost tables. It is better to // do this operation with the stand-alone perl script on the customTrash // database machine. if (delLostTable && lostTables) { struct slName *el; for (el = lostTables; el != NULL; el = el->next) { if (sameWord(el->name,"history")) continue; if (sameWord(el->name,CT_META_INFO)) continue; if (sameWord(el->name,CT_EXTFILE)) continue; boolean oneTableOnly = FALSE; // protect against multiple tables /* get table time information to see if it is expired */ sqlSafef(query,sizeof(query),"show table status like '%s'", el->name); STATUS_INIT; while ((row = sqlNextRow(sr)) != NULL) { if (oneTableOnly) errAbort("ERROR: query: '%s' returned more than one table " "name\n", query); else oneTableOnly = TRUE; if (differentWord(row[nameIx], el->name)) errAbort("ERROR: query: '%s' did not return table name '%s' != '%s'\n", query, el->name, row[nameIx]); SCAN_STATUS; if (timep < dropTime) { slNameAddHead(&expiredTableNames, row[nameIx]); verbose(2,"%s %ld dropt %s lost table\n", row[timeIxUsed], (unsigned long)timep, row[nameIx]); } else verbose(3,"%s %ld OKt %s\n", row[timeIxUsed], (unsigned long)timep, row[nameIx]); } sqlFreeResult(&sr); } } } /* perhaps the table was already dropped, but not from the metaInfo */ struct hashEl *elList = hashElListHash(expiredHash); struct hashEl *el; for (el = elList; el != NULL; el = el->next) { verbose(2,"%s exists in %s only\n", el->name, CT_META_INFO); if (drop) ctTouchLastUse(conn, el->name, FALSE); /* removes metaInfo row */ } if (drop) { char comment[256]; if (expiredTableNames) { struct slName *el; int droppedCount = 0; /* customTrash DB user permissions do not have permissions to * drop tables. Must use standard special user that has all * permissions. If we are not using the standard user at this * point, then switch to it. */ if (sameWord(db,CUSTOM_TRASH)) { sqlDisconnect(&conn); conn = sqlConnect(db); } for (el = expiredTableNames; el != NULL; el = el->next) { verbose(2,"# drop %s\n", el->name); sqlDropTable(conn, el->name); ctTouchLastUse(conn, el->name, FALSE); /* removes metaInfo row */ ++droppedCount; } /* add a comment to the history table and finish up connection */ if (tableStatus) safef(comment, sizeof(comment), "Dropped %d tables with " "total size %llu, %llu lost tables", droppedCount, totalSize, lostTableCount); else safef(comment, sizeof(comment), "Dropped %d tables, no size info, %llu lost tables", droppedCount, lostTableCount); verbose(2,"# %s\n", comment); hgHistoryComment(conn, "%s", comment); } else { safef(comment, sizeof(comment), "Dropped no tables, none expired, %llu lost tables", lostTableCount); verbose(2,"# %s\n", comment); } } else { char comment[256]; if (expiredTableNames) { int droppedCount = slCount(expiredTableNames); if (tableStatus) safef(comment, sizeof(comment), "Would have dropped %d tables with " "total size %llu, %llu lost tables", droppedCount, totalSize, lostTableCount); else safef(comment, sizeof(comment), "Would have dropped %d tables, no size info, %llu lost tables", droppedCount, lostTableCount); verbose(2,"# %s\n", comment); } else { safef(comment, sizeof(comment), "Would have dropped no tables, none expired, %llu lost tables", lostTableCount); verbose(2,"# %s\n", comment); } } sqlDisconnect(&conn); }
void dbTrash(char *db) /* dbTrash - drop tables from a database older than specified N hours. */ { char query[256]; struct sqlResult *sr; char **row; int updateTimeIx; int createTimeIx; int dataLengthIx; int indexLengthIx; int nameIx; int timeIxUsed; unsigned long long totalSize = 0; struct slName *tableNames = NULL; /* subject to age limits */ struct hash *expiredHash = newHash(10); struct hash *notExpiredHash = newHash(10); struct sqlConnection *conn = sqlConnect(db); if (extFileCheck) checkExtFile(conn); time_t ageSeconds = (time_t)(ageHours * 3600); /* age in seconds */ safef(query,sizeof(query),"select name from %s WHERE " "lastUse < DATE_SUB(NOW(), INTERVAL %ld SECOND);", CT_META_INFO,ageSeconds); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) hashAddInt(expiredHash, row[0], 1); sqlFreeResult(&sr); safef(query,sizeof(query),"select name from %s WHERE " "lastUse >= DATE_SUB(NOW(), INTERVAL %ld SECOND);",CT_META_INFO,ageSeconds); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) hashAddInt(notExpiredHash, row[0], 1); sqlFreeResult(&sr); /* run through the table status business to get table size information */ safef(query,sizeof(query),"show table status"); sr = sqlGetResult(conn, query); nameIx = sqlFieldColumn(sr, "Name"); createTimeIx = sqlFieldColumn(sr, "Create_time"); updateTimeIx = sqlFieldColumn(sr, "Update_time"); dataLengthIx = sqlFieldColumn(sr, "Data_length"); indexLengthIx = sqlFieldColumn(sr, "Index_length"); while ((row = sqlNextRow(sr)) != NULL) { struct tm tm; time_t timep = 0; /* if not doing history too, and this is the history table, next row */ if ((!historyToo) && (sameWord(row[nameIx],"history"))) continue; /* also skip the metaInfo table */ if ((!historyToo) && (sameWord(row[nameIx],CT_META_INFO))) continue; /* don't delete the extFile table */ if (sameWord(row[nameIx],CT_EXTFILE)) continue; /* Update_time is sometimes NULL on MySQL 5 * so if it fails, then check the Create_time */ timeIxUsed = updateTimeIx; if ((row[updateTimeIx] != NULL) && (sscanf(row[updateTimeIx], "%4d-%2d-%2d %2d:%2d:%2d", &(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday), &(tm.tm_hour), &(tm.tm_min), &(tm.tm_sec)) != 6) ) { timeIxUsed = createTimeIx; if (sscanf(row[createTimeIx], "%4d-%2d-%2d %2d:%2d:%2d", &(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday), &(tm.tm_hour), &(tm.tm_min), &(tm.tm_sec)) != 6) { verbose(2,"%s %s %s\n", row[createTimeIx],row[updateTimeIx],row[nameIx]); errAbort("could not parse date %s or %s on table %s\n", row[createTimeIx], row[updateTimeIx], row[nameIx]); } } tm.tm_year -= 1900; tm.tm_mon -= 1; tm.tm_isdst = -1; /* do not know timezone, figure it out */ timep = mktime(&tm); if (hashLookup(expiredHash,row[nameIx])) { slNameAddHead(&tableNames, row[nameIx]); verbose(3,"%s %ld drop %s\n",row[timeIxUsed], (unsigned long)timep, row[nameIx]); /* If sizes are non-NULL, add them up */ if ( ((char *)NULL != row[dataLengthIx]) && ((char *)NULL != row[indexLengthIx]) ) totalSize += sqlLongLong(row[dataLengthIx]) + sqlLongLong(row[indexLengthIx]); hashRemove(expiredHash, row[nameIx]); } else { if (hashLookup(notExpiredHash,row[nameIx])) verbose(3,"%s %ld OK %s\n",row[timeIxUsed], (unsigned long)timep, row[nameIx]); else { /* table exists, but not in metaInfo, is it old enough ? */ if (timep < dropTime) { slNameAddHead(&tableNames, row[nameIx]); verbose(2,"%s %ld dropt %s\n", row[timeIxUsed], (unsigned long)timep, row[nameIx]); /* If sizes are non-NULL, add them up */ if ( ((char *)NULL != row[dataLengthIx]) && ((char *)NULL != row[indexLengthIx]) ) totalSize += sqlLongLong(row[dataLengthIx]) + sqlLongLong(row[indexLengthIx]); } else verbose(3,"%s %ld OKt %s\n",row[timeIxUsed], (unsigned long)timep, row[nameIx]); } } } sqlFreeResult(&sr); /* perhaps the table was already dropped, but not from the metaInfo */ struct hashEl *elList = hashElListHash(expiredHash); struct hashEl *el; for (el = elList; el != NULL; el = el->next) { verbose(2,"%s exists in %s only\n", el->name, CT_META_INFO); if (drop) ctTouchLastUse(conn, el->name, FALSE); /* removes metaInfo row */ } if (drop) { if (tableNames) { char comment[256]; struct slName *el; int droppedCount = 0; /* customTrash DB user permissions do not have permissions to * drop tables. Must use standard special user that has all * permissions. If we are not using the standard user at this * point, then switch to it. */ if (sameWord(db,CUSTOM_TRASH)) { sqlDisconnect(&conn); conn = sqlConnect(db); } for (el = tableNames; el != NULL; el = el->next) { verbose(2,"# drop %s\n", el->name); sqlDropTable(conn, el->name); ctTouchLastUse(conn, el->name, FALSE); /* removes metaInfo row */ ++droppedCount; } /* add a comment to the history table and finish up connection */ safef(comment, sizeof(comment), "Dropped %d tables with total size %llu", droppedCount, totalSize); verbose(2,"# %s\n", comment); hgHistoryComment(conn, comment); } } sqlDisconnect(&conn); }