Esempio n. 1
0
long long currentVmPeak()
/* return value of peak Vm memory usage (if /proc/ business exists) */
{
long long vmPeak = 0;

pid_t pid = getpid();
char temp[256];
safef(temp, sizeof(temp), "/proc/%d/status", (int) pid);
struct lineFile *lf = lineFileMayOpen(temp, TRUE);
if (lf)
    {
    char *line;
    while (lineFileNextReal(lf, &line))
	{	// typical line: 'VmPeak:     62646196 kB'
		// seems to always be kB
	if (stringIn("VmPeak", line))
	    {
	    char *words[3];
	    chopByWhite(line, words, 3);
	    vmPeak = sqlLongLong(words[1]);	// assume always 2nd word
	    break;
	    }
	}
    lineFileClose(&lf);
    }

return vmPeak;
}
Esempio n. 2
0
static boolean singleFilter(struct annoFilter *filter, char **row, int rowSize)
/* Apply one filter, using either filterFunc or type-based filter on column value.
 * Return TRUE if isExclude and filter passes, or if !isExclude and filter fails. */
{
boolean fail = FALSE;
if (filter->filterFunc != NULL)
    fail = filter->filterFunc(filter, row, rowSize);
else if (filter->op == afMatch)
    fail = !wildMatch((char *)(filter->values), row[filter->columnIx]);
else if (filter->op == afNotMatch)
    fail = wildMatch((char *)(filter->values), row[filter->columnIx]);
else
    {
    // column is a number -- integer or floating point?
    enum asTypes type = filter->type;
    if (asTypesIsFloating(type))
	fail = annoFilterDouble(filter, sqlDouble(row[filter->columnIx]));
    else if (asTypesIsInt(type))
	fail = annoFilterLongLong(filter, sqlLongLong(row[filter->columnIx]));
    else
	errAbort("annoFilterRowFails: unexpected enum asTypes %d for numeric filter op %d",
		 type, filter->op);
    }
if ((filter->isExclude && !fail) || (!filter->isExclude && fail))
    return TRUE;
return FALSE;
}
Esempio n. 3
0
void sqlLongLongStaticArray(char *s, long long **retArray, int *retSize)
/* Convert comma separated list of numbers to an array which will be
 * overwritten next call to this function, but need not be freed. */
{
static long long *array = NULL;
static unsigned alloc = 0;
unsigned count = 0;

for (;;)
    {
    char *e;
    if (s == NULL || s[0] == 0)
	break;
    e = strchr(s, ',');
    if (e != NULL)
	*e++ = 0;
    if (count >= alloc)
	{
	if (alloc == 0)
	    alloc = 64;
	else
	    alloc <<= 1;
	ExpandArray(array, count, alloc);
	}
    array[count++] = sqlLongLong(s);
    s = e;
    }
*retSize = count;
*retArray = array;
}
Esempio n. 4
0
void recordIntoHistory(struct sqlConnection *conn, unsigned id, char *table, boolean success)
/* Record success/failure into uploadAttempts and historyBits fields of table.   */
{
/* Get historyBits and fold status into it. */
char quickResult[32];
char query[256];
sqlSafef(query, sizeof(query), "select historyBits from %s where id=%u", table, id);
if (sqlQuickQuery(conn, query, quickResult, sizeof(quickResult)) == NULL)
    internalErr();
char *lastTimeField;
char *openResultField;
long long historyBits = sqlLongLong(quickResult);
historyBits <<= 1;
if (success)
    {
    historyBits |= 1;
    lastTimeField = "lastOkTime";
    openResultField = "openSuccesses";
    }
else
    {
    lastTimeField = "lastNotOkTime";
    openResultField = "openFails";
    }

sqlSafef(query, sizeof(query), 
    "update %s set historyBits=%lld, %s=%s+1, %s=%lld "
    "where id=%lld",
    table, historyBits, openResultField, openResultField, lastTimeField, edwNow(),
    (long long)id);
sqlUpdate(conn, query);
}
Esempio n. 5
0
boolean checkMaxTableSizeExceeded(char *table)
/* check if max table size has been exceeded, send email warning if so */
{
boolean squealed = FALSE;
long long dataLength = 0;
long long dataFree = 0;
struct sqlResult *sr;
char **row;
char query[256];
sqlSafef(query, sizeof(query), "show table status like '%s'", table );
sr = sqlGetResult(conn, query);
row = sqlNextRow(sr);
if (!row)
    errAbort("error fetching table status");
int dlField = sqlFieldColumn(sr, "Data_length");
if (dlField == -1)
    errAbort("error finding field 'Data_length' in show table status resultset");
dataLength = sqlLongLong(row[dlField]);
int dfField = sqlFieldColumn(sr, "Data_free");
if (dfField == -1)
    errAbort("error finding field 'Data_free' in show table status resultset");
dataFree = sqlLongLong(row[dfField]);
verbose(1, "%s: Data_length=%lld Data_free=%lld\n\n", table, dataLength, dataFree);
if ((dataLength / (1024 * 1024 * 1024)) >= squealSize)
    {
    char msg[256];
    char cmdLine[256];
    char *emailList = "[email protected] [email protected] [email protected]";
    safef(msg, sizeof(msg), "BIG HGCENTRAL TABLE %s data_length: %lld data_free: %lld\n"
	, table, dataLength, dataFree);
    printf("%s", msg);
    safef(cmdLine, sizeof(cmdLine), 
	"echo '%s'|mail -s 'WARNING hgcentral cleanup detected data_length max size %d GB exceeded' %s"
	, msg
	, squealSize
	, emailList
	);
    system(cmdLine);
    squealed = TRUE;

    }
sqlFreeResult(&sr);
return squealed;
}
Esempio n. 6
0
void tableStatusStaticLoad(char **row, struct tableStatus *ret)
/* Load a row from tableStatus table into ret.  The contents of ret will
 * be replaced at the next call to this function. */
{

ret->name = row[0];
ret->type = row[1];
ret->rowFormat = row[2];
ret->rows = sqlUnsigned(row[3]);
ret->aveRowLength = sqlUnsigned(row[4]);
ret->dataLength = sqlLongLong(row[5]);
ret->maxDataLength = sqlLongLong(row[6]);
ret->indexLength = sqlLongLong(row[7]);
ret->dataFree = sqlLongLong(row[8]);
ret->autoIncrement = row[9];
ret->createTime = row[10];
ret->updateTime = row[11];
ret->checkTime = row[12];
ret->createOptions = row[13];
ret->comment = row[14];
}
Esempio n. 7
0
void scoredRefStaticLoad(char **row, struct scoredRef *ret)
/* Load a row from scoredRef table into ret.  The contents of ret will
 * be replaced at the next call to this function. */
{

ret->chrom = row[0];
ret->chromStart = sqlUnsigned(row[1]);
ret->chromEnd = sqlUnsigned(row[2]);
ret->extFile = sqlUnsigned(row[3]);
ret->offset = sqlLongLong(row[4]);
ret->score = atof(row[5]);
}
Esempio n. 8
0
long long edwGotFile(struct sqlConnection *conn, char *submitDir, char *submitFileName, 
    char *md5, long long size)
/* See if we already got file.  Return fileId if we do,  otherwise -1.  This returns
 * TRUE based mostly on the MD5sum.  For short files (less than 100k) then we also require
 * the submitDir and submitFileName to match.  This is to cover the case where you might
 * have legitimate empty files duplicated even though they were computed based on different
 * things. For instance coming up with no peaks is a legitimate result for many chip-seq
 * experiments. */
{
/* For large files just rely on MD5. */
char query[PATH_LEN+512];
if (size > 100000)
    {
    sqlSafef(query, sizeof(query),
        "select id from edwFile where md5='%s' order by submitId desc limit 1" , md5);
    long long result = sqlQuickLongLong(conn, query);
    if (result == 0)
        result = -1;
    return result;
    }

/* Rest of the routine deals with smaller files,  which we are less worried about
 * duplicating,  and indeed expect a little duplication of the empty file if none
 * other. */

/* First see if we have even got the directory. */
sqlSafef(query, sizeof(query), "select id from edwSubmitDir where url='%s'", submitDir);
int submitDirId = sqlQuickNum(conn, query);
if (submitDirId <= 0)
    return -1;

/* The complex truth is that we may have gotten this file multiple times. 
 * We return the most recent version where it got uploaded and passed the post-upload
 * MD5 sum, and thus where the MD5 field is filled in the database. */
sqlSafef(query, sizeof(query), 
    "select md5,id from edwFile "
    "where submitFileName='%s' and submitDirId = %d and md5 != '' "
    "order by submitId desc limit 1"
    , submitFileName, submitDirId);
struct sqlResult *sr = sqlGetResult(conn, query);
char **row;
long fileId = -1;
if ((row = sqlNextRow(sr)) != NULL)
    {
    char *dbMd5 = row[0];
    if (sameWord(md5, dbMd5))
	fileId = sqlLongLong(row[1]);
    }
sqlFreeResult(&sr);

return fileId;
}
Esempio n. 9
0
long long sqlLongLongComma(char **pS)
/* Return offset (often 64 bits) at *pS.  Advance *pS past comma at 
 * end */
{
char *s = *pS;
char *e = strchr(s, ',');
long long ret;

*e++ = 0;
*pS = e;
ret = sqlLongLong(s);
return ret;
}
Esempio n. 10
0
static void setStepSize(char *spec)
/*	given *spec: "step=<position>", parse and set stepSize and dataSpan */
{
char *wordPairs[2];
int wc;
char *clone;

clone = cloneString(spec);
wc = chopByChar(spec, '=', wordPairs, 2);
if (wc != 2)
    errAbort("Expecting step=<size>, no <size> found in %s at line %lu",
	clone, lineCount);
stepSize=sqlLongLong(wordPairs[1]);
freeMem(clone);
}
Esempio n. 11
0
struct tableStatus *tableStatusLoad(char **row)
/* Load a tableStatus from row fetched with select * from tableStatus
 * from database.  Dispose of this with tableStatusFree(). */
{
struct tableStatus *ret;

AllocVar(ret);
ret->name = cloneString(row[0]);
ret->type = cloneString(row[1]);
ret->rowFormat = cloneString(row[2]);
ret->rows = sqlUnsigned(row[3]);
ret->aveRowLength = sqlUnsigned(row[4]);
ret->dataLength = sqlLongLong(row[5]);
ret->maxDataLength = sqlLongLong(row[6]);
ret->indexLength = sqlLongLong(row[7]);
ret->dataFree = sqlLongLong(row[8]);
ret->autoIncrement = cloneString(row[9]);
ret->createTime = cloneString(row[10]);
ret->updateTime = cloneString(row[11]);
ret->checkTime = cloneString(row[12]);
ret->createOptions = cloneString(row[13]);
ret->comment = cloneString(row[14]);
return ret;
}
Esempio n. 12
0
struct scoredRef *scoredRefLoad(char **row)
/* Load a scoredRef from row fetched with select * from scoredRef
 * from database.  Dispose of this with scoredRefFree(). */
{
struct scoredRef *ret;

AllocVar(ret);
ret->chrom = cloneString(row[0]);
ret->chromStart = sqlUnsigned(row[1]);
ret->chromEnd = sqlUnsigned(row[2]);
ret->extFile = sqlUnsigned(row[3]);
ret->offset = sqlLongLong(row[4]);
ret->score = atof(row[5]);
return ret;
}
Esempio n. 13
0
static void setDataSpan(char *spec)
/*	given *spec: "dataSpan=N",  set parse and dataSpan to N	*/
{
char *wordPairs[2];
int wc;
char *clone;

clone = cloneString(spec);
wc = chopByChar(spec, '=', wordPairs, 2);
if (wc != 2)
    errAbort("Expecting span=N, no N found in %s at line %lu",
	clone, lineCount);
dataSpan = sqlLongLong(wordPairs[1]);
freeMem(clone);
}
Esempio n. 14
0
static void brokenRefPepGetSeqScan(struct sqlConnection *conn,
                                   struct extFileTbl* extFileTbl,
                                   struct brokenRefPepTbl *brpTbl)
/* load refSeq peps that have seq or extFile problems, including
 * checking fasta file contents*/
{
static char *query = NOSQLINJ "select id, acc, version, size, gbExtFile, file_offset, file_size "
    "from gbSeq where (acc like \"NP__%\") or (acc like \"YP__%\")";
struct sqlResult *sr = sqlGetResult(conn, query);
char **row;

while ((row = sqlNextRow(sr)) != NULL)
    brokenRefPepSeqCheck(conn, extFileTbl, brpTbl,
                         sqlSigned(row[0]), row[1], sqlSigned(row[2]),
                         sqlUnsigned(row[3]), sqlUnsigned(row[4]),
                         sqlLongLong(row[5]), sqlUnsigned(row[6]));
sqlFreeResult(&sr);
}
Esempio n. 15
0
int sqlLongLongArray(char *s, long long *array, int arraySize)
/* Convert comma separated list of numbers to an array.  Pass in 
 * array and max size of array. */
{
unsigned count = 0;
for (;;)
    {
    char *e;
    if (s == NULL || s[0] == 0 || count == arraySize)
	break;
    e = strchr(s, ',');
    if (e != NULL)
	*e++ = 0;
    array[count++] = sqlLongLong(s);
    s = e;
    }
return count;
}
Esempio n. 16
0
static void setFixedStart(char *spec)
/*	given *spec: "start=<position>", parse and set fixedStart */
{
char *wordPairs[2];
int wc;
char *clone;

clone = cloneString(spec);
wc = chopByChar(spec, '=', wordPairs, 2);
if (wc != 2)
    errAbort("Expecting start=<position>, no <position> found in %s at line %lu",
	clone, lineCount);
fixedStart=sqlLongLong(wordPairs[1]);
if (fixedStart == 0)
    errAbort("Found start=0 at line %lu, the first chrom position is 1, not 0",
	lineCount);
else
    fixedStart = BASE_0(fixedStart);	/* zero relative half-open */
freeMem(clone);
}
Esempio n. 17
0
void cdwReallyRemoveFiles(char *email, char *submitUrl, int fileCount, char *fileIds[])
/* cdwReallyRemoveFiles - Remove files from data warehouse.  Generally you want to depricate them 
 * instead. */
{
/* First convert all fileIds to binary. Do this first so bad command lines get caught. */
long long ids[fileCount];
int i;
for (i = 0; i<fileCount; ++i)
    ids[i] = sqlLongLong(fileIds[i]);

/* Get hash of all submissions by user from that URL.  Hash is keyed by ascii version of
 * submitId. */
struct sqlConnection *conn = cdwConnectReadWrite();
struct cdwUser *user = cdwMustGetUserFromEmail(conn, email);
char query[256];
sqlSafef(query, sizeof(query), 
    " select cdwSubmit.id,cdwSubmitDir.id from cdwSubmit,cdwSubmitDir "
    " where cdwSubmit.submitDirId=cdwSubmitDir.id and userId=%d "
    " and cdwSubmitDir.url='%s' ",
    user->id, submitUrl);
struct hash *submitHash = sqlQuickHash(conn, query);

/* Make sure that files and submission really go together. */
for (i=0; i<fileCount; ++i)
    {
    long long fileId = ids[i];
    char buf[64];
    sqlSafef(query, sizeof(query), "select submitId from cdwFile where id=%lld", fileId);
    char *result = sqlQuickQuery(conn, query, buf, sizeof(buf));
    if (result == NULL)
        errAbort("%lld is not a fileId in the warehouse", fileId);
    if (hashLookup(submitHash, result) == NULL)
        errAbort("File ID %lld does not belong to submission set based on %s", fileId, submitUrl);
    }

/* OK - paranoid checking is done, now let's remove each file from the tables it is in. */
for (i=0; i<fileCount; ++i)
    {
    cdwReallyRemoveFile(conn, ids[i], really);
    }
}
Esempio n. 18
0
static void raInfoAdd(struct raInfoTbl *rit, struct hash *raRec,
                      char *acc, short ver, char *seqSzFld, char *offFld, char *recSzFld,
                      unsigned extFileId)
/* add a ra mrna or pep */
{
    struct hashEl *hel;
    struct raInfo *ri;
    char accVer[GB_ACC_BUFSZ];
    if (extFileId == 0)
        errAbort("no extFileId for %s.%d", acc, ver);
    gbVerbPr(10, "raAdd %s.%d ext %d", acc, ver, extFileId);
    lmAllocVar(rit->accMap->lm, ri);
    safef(accVer, sizeof(accVer), "%s.%d", acc, ver);
    hel = hashAdd(rit->accMap, accVer, ri);
    ri->acc = lmCloneString(rit->accMap->lm, acc);
    ri->version = ver;
    ri->size = sqlUnsigned((char*)hashMustFindVal(raRec, seqSzFld));
    ri->offset = sqlLongLong((char*)hashMustFindVal(raRec, offFld));
    ri->fileSize = sqlUnsigned((char*)hashMustFindVal(raRec, recSzFld));
    ri->extFileId = extFileId;
}
Esempio n. 19
0
void edwChangeFormat(char *format, int idCount, char *idStrings[])
/* edwChangeFormat - Change format and force a revalidation for a file.. */
{
struct sqlConnection *conn = edwConnectReadWrite();

/* Convert ascii id's to valid file ids so we catch errors early. */
long long ids[idCount];
struct edwValidFile *vfs[idCount];
int i;
for (i=0; i<idCount; ++i)
    {
    long long id = ids[i] = sqlLongLong(idStrings[i]);
    struct edwValidFile *vf = vfs[i] = edwValidFileFromFileId(conn, id);
    if (vf == NULL)
        errAbort("%lld is not a fileId in the edwValidFile table", id);
    }

/* Loop through each file and change format. */
for (i=0; i<idCount; ++i)
    {
    changeFormat(conn, vfs[i], format);
    }
sqlDisconnect(&conn);
}
Esempio n. 20
0
void edwParseSubmitFile(struct sqlConnection *conn, char *submitLocalPath, char *submitUrl, 
    struct submitFileRow **retSubmitList)
/* Load and parse up this file as fielded table, make sure all required fields are there,
 * and calculate indexes of required fields.   This produces an edwFile list, but with
 * still quite a few fields missing - just what can be filled in from submit filled in. 
 * The submitUrl is just used for error reporting.  If it's local, just make it the
 * same as submitLocalPath. */
{
char *requiredFields[] = {"file_name", "format", "output_type", "experiment", "replicate", 
    "enriched_in", "md5_sum", "size",  "modified", "valid_key"};
struct fieldedTable *table = fieldedTableFromTabFile(submitLocalPath, submitUrl,
	requiredFields, ArraySize(requiredFields));

/* Get offsets of all required fields */
int fileIx = stringArrayIx("file_name", table->fields, table->fieldCount);
int formatIx = stringArrayIx("format", table->fields, table->fieldCount);
int outputIx = stringArrayIx("output_type", table->fields, table->fieldCount);
int experimentIx = stringArrayIx("experiment", table->fields, table->fieldCount);
int replicateIx = stringArrayIx("replicate", table->fields, table->fieldCount);
int enrichedIx = stringArrayIx("enriched_in", table->fields, table->fieldCount);
int md5Ix = stringArrayIx("md5_sum", table->fields, table->fieldCount);
int sizeIx = stringArrayIx("size", table->fields, table->fieldCount);
int modifiedIx = stringArrayIx("modified", table->fields, table->fieldCount);
int validIx = stringArrayIx("valid_key", table->fields, table->fieldCount);

/* See if we're doing replacement and check have all columns needed if so. */
int replacesIx = stringArrayIx(replacesTag, table->fields, table->fieldCount);
int replaceReasonIx = stringArrayIx(replaceReasonTag, table->fields, table->fieldCount);
boolean doReplace = (replacesIx != -1);
if (doReplace)
    if (replaceReasonIx == -1)
        errAbort("Error: got \"%s\" column without \"%s\" column in %s.", 
	    replacesTag, replaceReasonTag, submitUrl);

/* Loop through and make sure all field values are ok */
struct fieldedRow *fr;
for (fr = table->rowList; fr != NULL; fr = fr->next)
    {
    char **row = fr->row;
    char *fileName = row[fileIx];
    allGoodFileNameChars(fileName);
    char *format = row[formatIx];
    if (!isSupportedFormat(format))
	errAbort("Format %s is not supported", format);
    allGoodSymbolChars(row[outputIx]);
    char *experiment = row[experimentIx];
    if (!isExperimentId(experiment))
        errAbort("%s in experiment field does not seem to be an encode experiment", experiment);
    char *replicate = row[replicateIx];
    if (differentString(replicate, "pooled") && differentString(replicate, "n/a") )
	if (!isAllNum(replicate))
	    errAbort("%s is not a good value for the replicate column", replicate);
    char *enriched = row[enrichedIx];
    if (!encode3CheckEnrichedIn(enriched))
        errAbort("Enriched_in %s is not supported", enriched);
    char *md5 = row[md5Ix];
    if (strlen(md5) != 32 || !isAllHexLower(md5))
        errAbort("md5 '%s' is not in all lower case 32 character hexadecimal format.", md5);
    char *size = row[sizeIx];
    if (!isAllNum(size))
        errAbort("Invalid size '%s'", size);
    char *modified = row[modifiedIx];
    if (!isAllNum(modified))
        errAbort("Invalid modification time '%s'", modified);
    char *validIn = row[validIx];
    char *realValid = encode3CalcValidationKey(md5, sqlLongLong(size));
    if (!sameString(validIn, realValid))
        errAbort("The valid_key %s for %s doesn't fit", validIn, fileName);
    freez(&realValid);

    if (doReplace)
	{
	char *replaces = row[replacesIx];
	char *reason = row[replaceReasonIx];
	if (!isEmptyOrNa(replaces))
	    {
	    char *prefix = edwLicensePlateHead(conn);
	    if (!startsWith(prefix, replaces))
		errAbort("%s in replaces column is not an ENCODE file accession", replaces);
	    if (isEmptyOrNa(reason))
		errAbort("Replacing %s without a reason\n", replaces);
	    }
	}
    }

*retSubmitList = submitFileRowFromFieldedTable(conn, table, 
    fileIx, md5Ix, sizeIx, modifiedIx, replacesIx, replaceReasonIx);
}
Esempio n. 21
0
struct cdwBamFile *cdwBamFileFromNextRa(struct lineFile *lf, struct raToStructReader *reader)
/* Return next stanza put into an cdwBamFile. */
{
enum fields
    {
    isPairedField,
    isSortedByTargetField,
    readCountField,
    readBaseCountField,
    mappedCountField,
    uniqueMappedCountField,
    readSizeMeanField,
    readSizeStdField,
    readSizeMinField,
    readSizeMaxField,
    u4mReadCountField,
    u4mUniquePosField,
    u4mUniqueRatioField,
    targetBaseCountField,
    targetSeqCountField,
    };
if (!raSkipLeadingEmptyLines(lf, NULL))
    return NULL;

struct cdwBamFile *el;
AllocVar(el);

bool *fieldsObserved = reader->fieldsObserved;
bzero(fieldsObserved, reader->fieldCount);

char *tag, *val;
while (raNextTagVal(lf, &tag, &val, NULL))
    {
    struct hashEl *hel = hashLookup(reader->fieldIds, tag);
    if (hel != NULL)
        {
	int id = ptToInt(hel->val);
	if (fieldsObserved[id])
	     errAbort("Duplicate tag %s line %d of %s\n", tag, lf->lineIx, lf->fileName);
	fieldsObserved[id] = TRUE;
	switch (id)
	    {
	    case isPairedField:
	        {
	        el->isPaired = sqlSigned(val);
		break;
	        }
	    case isSortedByTargetField:
	        {
	        el->isSortedByTarget = sqlSigned(val);
		break;
	        }
	    case readCountField:
	        {
	        el->readCount = sqlLongLong(val);
		break;
	        }
	    case readBaseCountField:
	        {
	        el->readBaseCount = sqlLongLong(val);
		break;
	        }
	    case mappedCountField:
	        {
	        el->mappedCount = sqlLongLong(val);
		break;
	        }
	    case uniqueMappedCountField:
	        {
	        el->uniqueMappedCount = sqlLongLong(val);
		break;
	        }
	    case readSizeMeanField:
	        {
	        el->readSizeMean = sqlDouble(val);
		break;
	        }
	    case readSizeStdField:
	        {
	        el->readSizeStd = sqlDouble(val);
		break;
	        }
	    case readSizeMinField:
	        {
	        el->readSizeMin = sqlSigned(val);
		break;
	        }
	    case readSizeMaxField:
	        {
	        el->readSizeMax = sqlSigned(val);
		break;
	        }
	    case u4mReadCountField:
	        {
	        el->u4mReadCount = sqlSigned(val);
		break;
	        }
	    case u4mUniquePosField:
	        {
	        el->u4mUniquePos = sqlSigned(val);
		break;
	        }
	    case u4mUniqueRatioField:
	        {
	        el->u4mUniqueRatio = sqlDouble(val);
		break;
	        }
	    case targetBaseCountField:
	        {
	        el->targetBaseCount = sqlLongLong(val);
		break;
	        }
	    case targetSeqCountField:
	        {
	        el->targetSeqCount = sqlUnsigned(val);
		break;
	        }
	    default:
	        internalErr();
		break;
	    }
	}
    }

raToStructReaderCheckRequiredFields(reader, lf);
return el;
}
Esempio n. 22
0
struct edwFastqFile *edwFastqFileFromNextRa(struct lineFile *lf, struct raToStructReader *reader)
/* Return next stanza put into an edwFastqFile. */
{
enum fields
    {
    readCountField,
    baseCountField,
    readSizeMeanField,
    readSizeStdField,
    readSizeMinField,
    readSizeMaxField,
    qualMeanField,
    qualStdField,
    qualMinField,
    qualMaxField,
    qualTypeField,
    qualZeroField,
    atRatioField,
    aRatioField,
    cRatioField,
    gRatioField,
    tRatioField,
    nRatioField,
    posCountField,
    qualPosField,
    aAtPosField,
    cAtPosField,
    gAtPosField,
    tAtPosField,
    nAtPosField,
    };
if (!raSkipLeadingEmptyLines(lf, NULL))
    return NULL;

struct edwFastqFile *el;
AllocVar(el);

bool *fieldsObserved = reader->fieldsObserved;
bzero(fieldsObserved, reader->fieldCount);

char *tag, *val;
while (raNextTagVal(lf, &tag, &val, NULL))
    {
    struct hashEl *hel = hashLookup(reader->fieldIds, tag);
    if (hel != NULL)
        {
	int id = ptToInt(hel->val);
	if (fieldsObserved[id])
	     errAbort("Duplicate tag %s line %d of %s\n", tag, lf->lineIx, lf->fileName);
	fieldsObserved[id] = TRUE;
	switch (id)
	    {
	    case readCountField:
	        {
	        el->readCount = sqlLongLong(val);
		break;
	        }
	    case baseCountField:
	        {
	        el->baseCount = sqlLongLong(val);
		break;
	        }
	    case readSizeMeanField:
	        {
	        el->readSizeMean = sqlDouble(val);
		break;
	        }
	    case readSizeStdField:
	        {
	        el->readSizeStd = sqlDouble(val);
		break;
	        }
	    case readSizeMinField:
	        {
	        el->readSizeMin = sqlSigned(val);
		break;
	        }
	    case readSizeMaxField:
	        {
	        el->readSizeMax = sqlSigned(val);
		break;
	        }
	    case qualMeanField:
	        {
	        el->qualMean = sqlDouble(val);
		break;
	        }
	    case qualStdField:
	        {
	        el->qualStd = sqlDouble(val);
		break;
	        }
	    case qualMinField:
	        {
	        el->qualMin = sqlDouble(val);
		break;
	        }
	    case qualMaxField:
	        {
	        el->qualMax = sqlDouble(val);
		break;
	        }
	    case qualTypeField:
	        {
	        el->qualType = cloneString(val);
		break;
	        }
	    case qualZeroField:
	        {
	        el->qualZero = sqlSigned(val);
		break;
	        }
	    case atRatioField:
	        {
	        el->atRatio = sqlDouble(val);
		break;
	        }
	    case aRatioField:
	        {
	        el->aRatio = sqlDouble(val);
		break;
	        }
	    case cRatioField:
	        {
	        el->cRatio = sqlDouble(val);
		break;
	        }
	    case gRatioField:
	        {
	        el->gRatio = sqlDouble(val);
		break;
	        }
	    case tRatioField:
	        {
	        el->tRatio = sqlDouble(val);
		break;
	        }
	    case nRatioField:
	        {
	        el->nRatio = sqlDouble(val);
		break;
	        }
	    case posCountField:
	        {
                int arraySize = sqlSigned(val);
                raToStructArraySignedSizer(lf, arraySize, &el->posCount, "posCount");
		break;
	        }
	    case qualPosField:
	        {
                int arraySize;
		sqlDoubleDynamicArray(val, &el->qualPos, &arraySize);
                raToStructArraySignedSizer(lf, arraySize, &el->posCount, "qualPos");
		break;
	        }
	    case aAtPosField:
	        {
                int arraySize;
		sqlDoubleDynamicArray(val, &el->aAtPos, &arraySize);
                raToStructArraySignedSizer(lf, arraySize, &el->posCount, "aAtPos");
		break;
	        }
	    case cAtPosField:
	        {
                int arraySize;
		sqlDoubleDynamicArray(val, &el->cAtPos, &arraySize);
                raToStructArraySignedSizer(lf, arraySize, &el->posCount, "cAtPos");
		break;
	        }
	    case gAtPosField:
	        {
                int arraySize;
		sqlDoubleDynamicArray(val, &el->gAtPos, &arraySize);
                raToStructArraySignedSizer(lf, arraySize, &el->posCount, "gAtPos");
		break;
	        }
	    case tAtPosField:
	        {
                int arraySize;
		sqlDoubleDynamicArray(val, &el->tAtPos, &arraySize);
                raToStructArraySignedSizer(lf, arraySize, &el->posCount, "tAtPos");
		break;
	        }
	    case nAtPosField:
	        {
                int arraySize;
		sqlDoubleDynamicArray(val, &el->nAtPos, &arraySize);
                raToStructArraySignedSizer(lf, arraySize, &el->posCount, "nAtPos");
		break;
	        }
	    default:
	        internalErr();
		break;
	    }
	}
    }

raToStructReaderCheckRequiredFields(reader, lf);
return el;
}
Esempio n. 23
0
/*	The single externally visible routine.
 *	Future improvements will need to add a couple more arguments to
 *	satisify the needs of the command line version and its options.
 *	Currently, this is used only in customTrack input parsing.
 */
void wigAsciiToBinary( char *wigAscii, char *wigFile, char *wibFile,
   double *upperLimit, double *lowerLimit, struct wigEncodeOptions *options)
/*	given the three file names, read the ascii wigAscii file and produce
 *	the wigFile and wibFile outputs
 */
{
struct lineFile *lf;			/* for line file utilities	*/
char *line = (char *) NULL;		/* to receive data input line	*/
char *words[10];				/* to split data input line	*/
int wordCount = 0;			/* result of split	*/
int validLines = 0;			/* counting only lines with data */
double dataValue = 0.0;			/* from data input	*/
boolean bedData = FALSE;		/* in bed format data */
boolean variableStep = FALSE;		/* in variableStep data */
boolean fixedStep = FALSE;		/* in fixedStep data */
char *prevChromName = (char *)NULL;	/* to watch for chrom name changes */
int trackCount = 0;			/* We abort if we see more than one track. */

if ((wigAscii == (char *)NULL) || (wigFile == (char *)NULL) ||
    (wibFile == (char *)NULL))
	errAbort("wigAsciiToBinary: missing data file names, ascii: %s, wig: %s, wib: %s", wigAscii, wigFile, wibFile);

/*	need to be careful here and initialize all the global variables */
freez(&wibFileName);			/* send this name to the global */
wibFileName = cloneString(wibFile);	/* variable for use in output_row() */
lineCount = 0;	/* to count all lines	*/
add_offset = 0;	/* to allow "lifting" of the data */
validLines = 0;	/* to count only lines with data */
rowCount = 0;	/* to count rows output */
bincount = 0;	/* to count up to binsize	*/
binsize = 1024;	/* # of data points per table row */
dataSpan = 1;	/* default bases spanned per data point */
chromStart = 0;	/* for table row data */
previousOffset = 0;  /* for data missing detection */
fileOffset = 0;	/* current location within binary data file	*/
fileOffsetBegin = 0;/* location in binary data file where this bin starts*/
freez(&data_values);
freez(&validData);
data_values = (double *) needMem( (size_t) (binsize * sizeof(double)));
validData = (unsigned char *)
	    needMem( (size_t) (binsize * sizeof(unsigned char)));

if (options != NULL)
    {
    if (options->lift != 0)
	add_offset = options->lift;
    if (options->noOverlap)
	noOverlap = TRUE;
    if (options->flagOverlapSpanData)
	flagOverlapSpanData = TRUE;
    if (options->wibSizeLimit > 0)
	wibSizeLimit = options->wibSizeLimit;
    }

/* limits for the complete set of data, they must change from these initial
	defaults during processing */
overallLowerLimit = wigEncodeStartingLowerLimit;
overallUpperLimit = wigEncodeStartingUpperLimit;
binout = mustOpen(wibFile,"w");	/*	binary data file	*/
wigout = mustOpen(wigFile,"w");	/*	table row definition file */
#if defined(DEBUG)	/*	dbg	*/
chmod(wibFile, 0666);
chmod(wigFile, 0666);
#endif
lf = lineFileOpen(wigAscii, TRUE);	/*	input file	*/
while (lineFileNext(lf, &line, NULL))
    {
    boolean readingFrameSlipped;

    ++lineCount;
    if ((wibSizeLimit > 0) && (wibSize >= wibSizeLimit))
        errAbort("data size limit of %lld data values has been exceeded.  This data can be efficiently displayed with the <A HREF='/goldenPath/help/bigWig.html' TARGET=_blank>bigWig file format</A> in a custom track, or in a <A HREF='/goldenPath/help/hgTrackHubHelp.html' TARGET=_blank>Track Hub</A> for multiple large datasets.", wibSizeLimit);
    line = skipLeadingSpaces(line);
    /*	ignore blank or comment lines	*/
    if ((line == (char *)NULL) || (line[0] == '\0') || (line[0] == '#'))
	continue;		/*	!!! go to next line of input */

    wordCount = chopByWhite(line, words, ArraySize(words));

    if (sameWord("track",words[0]))
	{
	/* Allow (and ignore) one track line, but no more. */
	++trackCount;
	if (trackCount > 1)
	    errAbort("Multiple tracks seen, second at line %d of %s, can only handle one.",
	    	lf->lineIx, lf->fileName);
	continue;	
	}
    else if (sameWord("browser", words[0]))
        {
	continue;	/* ignore browser lines if present */
	}
    else if (sameWord("variableStep",words[0]))
	{
	int i;
	boolean foundChrom = FALSE;
	/*	safest thing to do if we were processing anything is to
	 *	output that previous block and start anew
	 *	Future improvement could get fancy here and decide if it
	 *	is really necessary to start over, although the concept
	 *	of a line between data points on one item may use this
	 *	block behavior later to define line segments, so don't
	 *	get too quick to be fancy here.  This line behavior
	 *	implies that feature names will need to be specified to
	 *	identify the line segments that belong together.
	 */
	if (variableStep || bedData || fixedStep)
	    {
	    output_row();
	    validLines = 0;	/*	to cause reset for first offset	*/
	    }
	dataSpan = 1;	/* default bases spanned per data point */
	for(i = 1; i < wordCount; ++i)
	    {
	    if (startsWith("chrom",words[i]))
		{
		setChromName(words[i]);
		foundChrom = TRUE;
		}
	    else if (startsWith("span",words[i]))
		setDataSpan(words[i]);
	    else
		errAbort("illegal specification on variableStep at line %lu: %s",
		    lineCount, words[i]);
	    }
	if (!foundChrom)
	    errAbort("missing chrom=<name> specification on variableStep declaration at line %lu", lineCount);
	variableStep = TRUE;
	bedData = FALSE;
	fixedStep = FALSE;
	freez(&prevChromName);
	prevChromName = cloneString(chromName);
	continue;		/*	!!!  go to next input line	*/
	}
    else if (sameWord("fixedStep",words[0]))
	{
	boolean foundChrom = FALSE;
	boolean foundStart = FALSE;
	int i;

	/*	same comment as above	*/
	if (variableStep || bedData || fixedStep)
	    {
	    output_row();
	    validLines = 0;	/*	to cause reset for first offset	*/
	    }
	stepSize = 1;	/*	default step size	*/
	dataSpan = 0;	/*      this will match step size if not set*/
	for(i = 1; i < wordCount; ++i)
	    {
	    if (startsWith("chrom",words[i]))
		{
		setChromName(words[i]);
		foundChrom = TRUE;
		}
	    else if (startsWith("start",words[i]))
		{
		setFixedStart(words[i]);
		foundStart = TRUE;
		}
	    else if (startsWith("step",words[i]))
		setStepSize(words[i]);
	    else if (startsWith("span",words[i]))
		setDataSpan(words[i]);
	    else
		errAbort("illegal specification on variableStep at line %lu: %s",
		    lineCount, words[i]);
	    }
	if (dataSpan == 0)
	    dataSpan = stepSize;
	if (!foundChrom)
	    errAbort("missing chrom=<name> specification on fixedStep declaration at line %lu", lineCount);
	if (!foundStart)
	    errAbort("missing start=<position> specification on fixedStep declaration at line %lu", lineCount);
	if (noOverlap && validLines && prevChromName)
	    {
	    if (sameWord(prevChromName,chromName) && (fixedStart < chromStart))
		errAbort("specified fixedStep chromStart %llu is less than expected next chromStart %llu", fixedStart, chromStart);
	    }
	variableStep = FALSE;
	bedData = FALSE;
	fixedStep = TRUE;
	freez(&prevChromName);
	prevChromName = cloneString(chromName);
	continue;		/*	!!!  go to next input line	*/
	}
    else if (wordCount == 4)
	{
	/*	while in bedData, we do not necessarily need to start a new
	 *	batch unless the chrom name is changing, since dataSpan
	 *	is always 1 for bedData.  As above, this may change in
	 *	the future if each bed line specification is talking
	 *	about a different feature.
	 */
	if (variableStep || fixedStep ||
		(bedData && ((prevChromName != (char *)NULL) &&
			differentWord(prevChromName,words[0]))))
	    {
	    output_row();
	    validLines = 0;	/*	to cause reset for first offset	*/
	    }
	dataSpan = 1;	/* default bases spanned per data point */
	variableStep = FALSE;
	bedData = TRUE;
	fixedStep = FALSE;
	freez(&chromName);
	chromName=cloneString(words[0]);
	freez(&featureName);
	featureName=cloneString(words[0]);
	bedChromStart = sqlLongLong(words[1]);
	bedChromEnd = sqlLongLong(words[2]);
	bedDataValue = sqlDouble(words[3]);
	/* the bed format coordinate system is zero relative, half-open,
	 * hence, no adjustment of bedChromStart is needed here, unlike the
	 * fixed and variable step formats which will subtract one from the
	 * incoming coordinate.
	 */
	if (bedChromStart >= bedChromEnd)
	    errAbort("Found chromStart >= chromEnd at line %lu (%llu > %llu)",
		lineCount, bedChromStart, bedChromEnd);
	if (bedChromEnd > (bedChromStart + 10000000))
	    errAbort("Limit of 10,000,000 length specification for bed format at line %lu, found: %llu)",
		lineCount, bedChromEnd-bedChromStart);
	if ((validLines > 0) && (bedChromStart < previousOffset))
	    errAbort("chrom positions not in numerical order at line %lu. previous: %llu > %llu <-current (bed)", lineCount, previousOffset, bedChromStart);
	freez(&prevChromName);
	prevChromName = cloneString(chromName);
	}

    /*	We must be in one of these data formats at this point */
    if (!(variableStep || fixedStep || bedData))
	errAbort("at the line beginning: %s, variableStep or fixedStep data declaration not found or BED data 4 column format not recognized.", words[0]); 
    if (variableStep && (wordCount != 2))
	errAbort("Expecting two words for variableStep data at line %lu, found %d",
	    lineCount, wordCount);
    if (fixedStep && (wordCount != 1))
	errAbort("Expecting one word for fixedStep data at line %lu, found %d",
	    lineCount, wordCount);
    if (bedData && (wordCount != 4))
	errAbort("Expecting four words for bed format data at line %lu, found %d",
	    lineCount, wordCount);

    ++validLines;		/*	counting good lines of data input */

    /*	Offset is the incoming specified position for this value,
     *	fixedStart has already been converted to zero
     *	relative half open
     */
    if (variableStep)
	{
	Offset = sqlLongLong(words[0]);
	Offset = BASE_0(Offset);	/* zero relative half open */
	dataValue = sqlDouble(words[1]);
	}
    else if (fixedStep)
	{
	Offset = fixedStart + (stepSize * (validLines - 1));
	dataValue = sqlDouble(words[0]);
	}
    else if (bedData)
	{
	Offset = bedChromStart;
	dataValue = bedDataValue;
	}
    if (dataValue > overallUpperLimit) overallUpperLimit = dataValue;
    if (dataValue < overallLowerLimit) overallLowerLimit = dataValue;

    /* see if this is the first time through, establish chromStart 	*/
    if (validLines == 1)
	{
	chromStart = Offset;
	verbose(2, "first offset: %llu\n", chromStart);
	}
    else if ((validLines > 1) && (Offset <= previousOffset))
	errAbort("chrom positions not in numerical order at line %lu. previous: %llu > %llu "
                 "<-current (offset)", lineCount, BASE_1(previousOffset), BASE_1(Offset));

    /* if we are working on a zoom level and the data is not exactly
     * spaced according to the span, then we need to put each value
     * in its own row in order to keep positioning correct for these
     * data values.  The number of skipped bases has to be an even
     * multiple of dataSpan
     */
    readingFrameSlipped = FALSE;
    if ((validLines > 1) && (dataSpan > 1))
	{
	unsigned long long prevEnd = previousOffset + dataSpan;
	int skippedBases;
	int spansSkipped;
	skippedBases = Offset - previousOffset;
	if (flagOverlapSpanData && (prevEnd > Offset))
	    errAbort("ERROR: data points overlapping at input line %lu.\n"
		"previous data point position: %s:%llu-%llu overlaps current: %s:%llu-%llu",
		lineCount, chromName, BASE_1(previousOffset), prevEnd,
		chromName, BASE_1(Offset),Offset+dataSpan);
	spansSkipped = skippedBases / dataSpan;
	if ((spansSkipped * dataSpan) != skippedBases)
	    readingFrameSlipped = TRUE;
	}

    if (readingFrameSlipped)
	{
	verbose(2, "data not spanning %llu bases, prev: %llu, this: %llu, at line: %lu\n", dataSpan, previousOffset, Offset, lineCount);
	output_row();
	chromStart = Offset;	/*	a full reset here	*/
	}
    /*	Check to see if data is being skipped	*/
    else if ( (validLines > 1) && (Offset > (previousOffset + dataSpan)) )
	{
	unsigned long long off;
	unsigned long long fillSize;	/* number of bytes */

	verbose(2, "missing data offsets: %llu - %llu\n",
		BASE_1(previousOffset),BASE_0(Offset));
	/*	If we are just going to fill the rest of this bin with
	 *  no data, then may as well stop here.  No need to fill
	 *  it with nothing.
	 */
	fillSize = (Offset - (previousOffset + dataSpan)) / dataSpan;
	verbose(2, "filling NO_DATA for %llu bytes\n", fillSize);
	if (fillSize + bincount >= binsize)
	    {
	    verbose(2, "completing a bin due to  NO_DATA for %llu bytes, only %llu - %llu = %llu to go\n", fillSize, binsize, bincount, binsize - bincount);
	    verbose(2, "Offset: %llu, previousOffset: %llu\n",
		    Offset, previousOffset);
	    output_row();
	    chromStart = Offset;	/*	a full reset here	*/
	    } else {
	    fillSize = 0;
	    /*	fill missing data with NO_DATA indication	*/
	    for (off = previousOffset + dataSpan; off < Offset;
		    off += dataSpan)
		{
		++fillSize;
		++fileOffset;
		++bincount;	/*	count scores in this bin */
		if (bincount >= binsize) break;
		}
	    verbose(2, "filled NO_DATA for %llu bytes\n", fillSize);
	    /*	If that finished off this bin, output it
	     *	This most likely should not happen here.  The
	     *	check above: if (fillSize + bincount >= binsize) 
	     *	should have caught this case already.
	     */
		if (bincount >= binsize)
		    {
		    output_row();
		    chromStart = Offset;	/* a full reset here */
		    }
	    }
	}

    /*	With perhaps the missing data taken care of, back to the
     *	real data.
     */
    if (bedData)
	{
	unsigned long long bedSize = bedChromEnd - bedChromStart;
	for ( ; bedSize > 0; --bedSize )
	    {
	    setDataValue(bedDataValue);
	    Offset += 1;
	    }
	Offset -= 1;	/*	loop above increments this one too much.
			 *	This Offset is supposed to be the last
			 *	valid chrom position written, not the
			 *	next to be written */
	}
    else
	{
	setDataValue(dataValue);
	}
    previousOffset = Offset;	/* remember position for gap calculations */
    }	/*	reading file input loop end	*/

/*	Done with input file, any data points left in this bin ?	*/
if (bincount)
    output_row();

lineFileClose(&lf);
fclose(binout);
fclose(wigout);
freez(&chromName);
freez(&featureName);
freez(&data_values);
freez(&validData);
freez(&wibFileName);
/*	return limits if pointers are given	*/
if (upperLimit)
    *upperLimit = overallUpperLimit;
if (lowerLimit)
    *lowerLimit = overallLowerLimit;
if (wibSizeLimit > 0)
	options->wibSizeLimit = wibSize;
}
Esempio n. 24
0
struct edwQaWigSpot *edwQaWigSpotFromNextRa(struct lineFile *lf, struct raToStructReader *reader)
/* Return next stanza put into an edwQaWigSpot. */
{
enum fields
    {
    spotRatioField,
    enrichmentField,
    basesInGenomeField,
    basesInSpotsField,
    sumSignalField,
    spotSumSignalField,
    };
if (!raSkipLeadingEmptyLines(lf, NULL))
    return NULL;

struct edwQaWigSpot *el;
AllocVar(el);

bool *fieldsObserved = reader->fieldsObserved;
bzero(fieldsObserved, reader->fieldCount);

char *tag, *val;
while (raNextTagVal(lf, &tag, &val, NULL))
    {
    struct hashEl *hel = hashLookup(reader->fieldIds, tag);
    if (hel != NULL)
        {
	int id = ptToInt(hel->val);
	if (fieldsObserved[id])
	     errAbort("Duplicate tag %s line %d of %s\n", tag, lf->lineIx, lf->fileName);
	fieldsObserved[id] = TRUE;
	switch (id)
	    {
	    case spotRatioField:
	        {
	        el->spotRatio = sqlDouble(val);
		break;
	        }
	    case enrichmentField:
	        {
	        el->enrichment = sqlDouble(val);
		break;
	        }
	    case basesInGenomeField:
	        {
	        el->basesInGenome = sqlLongLong(val);
		break;
	        }
	    case basesInSpotsField:
	        {
	        el->basesInSpots = sqlLongLong(val);
		break;
	        }
	    case sumSignalField:
	        {
	        el->sumSignal = sqlDouble(val);
		break;
	        }
	    case spotSumSignalField:
	        {
	        el->spotSumSignal = sqlDouble(val);
		break;
	        }
	    default:
	        internalErr();
		break;
	    }
	}
    }

raToStructReaderCheckRequiredFields(reader, lf);
return el;
}
Esempio n. 25
0
struct submitFileRow *submitFileRowFromFieldedTable(
    struct sqlConnection *conn, struct fieldedTable *table,
    int fileIx, int md5Ix, int sizeIx, int modifiedIx, int replacesIx, int replaceReasonIx)
/* Turn parsed out table (still all just strings) into list of edwFiles. */
{
struct submitFileRow *sfr, *sfrList = NULL;
struct edwFile *bf;
struct fieldedRow *fr;
struct dyString *tags = dyStringNew(0);
char *ucscDbTag = "ucsc_db";
int ucscDbField = stringArrayIx(ucscDbTag, table->fields, table->fieldCount);


for (fr = table->rowList; fr != NULL; fr = fr->next)
    {
    char **row = fr->row;
    AllocVar(bf);
    bf->submitFileName = cloneString(row[fileIx]);
    safef(bf->md5, sizeof(bf->md5), "%s", row[md5Ix]);
    bf->size = sqlLongLong(row[sizeIx]);
    bf->updateTime = sqlLongLong(row[modifiedIx]);

    /* Add as tags any fields not included in fixed fields. */
    dyStringClear(tags);
    int i;
    for (i=0; i<table->fieldCount; ++i)
        {
	if (i != fileIx && i != md5Ix && i != sizeIx && i != modifiedIx)
	    {
	    cgiEncodeIntoDy(table->fields[i], row[i], tags);
	    }
	}
    if (ucscDbField < 0)
        {
	/* Try to make this field up from file name */
	char *slash = strchr(bf->submitFileName, '/');
	if (slash == NULL)
	    errAbort("Can't make up '%s' field from '%s'", ucscDbTag, bf->submitFileName);
	int len = slash - bf->submitFileName;
	char ucscDbVal[len+1];
	memcpy(ucscDbVal, bf->submitFileName, len);
	ucscDbVal[len] = 0;

	/* Do a little check on it */
	if (!sameString("mm9", ucscDbVal) && !sameString("mm10", ucscDbVal)
	    && !sameString("dm3", ucscDbVal) && !sameString("ce10", ucscDbVal)
	    && !sameString("hg19", ucscDbVal))
	    errAbort("Unrecognized ucsc_db %s - please arrange files so that the top " 
	             "level directory in the fileName in the manifest is a UCSC database name "
		     "like 'hg19' or 'mm10.'  Alternatively please include a ucsc_db column.",
		     ucscDbVal);

	/* Add it to tags. */
	cgiEncodeIntoDy(ucscDbTag, ucscDbVal, tags);
	}
    bf->tags = cloneString(tags->string);

    /* Fake other fields. */
    bf->edwFileName  = cloneString("");

    /* Allocate wrapper structure */
    AllocVar(sfr);
    sfr->file = bf;

    /* fill in fields about replacement maybe */
    if (replacesIx != -1)
        {
	char *replacesAcc = row[replacesIx];
	char *reason = row[replaceReasonIx];
	int fileId = edwFileIdForLicensePlate(conn, replacesAcc);
	if (fileId == 0)
	    errAbort("%s in %s column doesn't exist in warehouse", replacesAcc, replacesTag);
	sfr->replaces = cloneString(replacesAcc);
	sfr->replaceReason = cloneString(reason);
	sfr->replacesFile = fileId;
	}

    slAddHead(&sfrList, sfr);
    }
slReverse(&sfrList);
dyStringFree(&tags);
return sfrList;
}
Esempio n. 26
0
void dbTrash(char *db)
/* dbTrash - drop tables from a database older than specified N hours. */
{
char query[256];
struct sqlResult *sr;
char **row;
int updateTimeIx;
int createTimeIx;
int dataLengthIx;
int indexLengthIx;
int nameIx;
int timeIxUsed;
unsigned long long totalSize = 0;
// expiredTableNames: table exists and is in metaInfo and subject to age limits
struct slName *expiredTableNames = NULL;
struct slName *lostTables = NULL;	// tables existing but not in metaInfo
unsigned long long lostTableCount = 0;
struct hash *expiredHash = newHash(10); // as determined by metaInfo
struct hash *notExpiredHash = newHash(10);
struct sqlConnection *conn = sqlConnect(db);

if (extFileCheck)
    checkExtFile(conn);

time_t ageSeconds = (time_t)(ageHours * 3600);	/*	age in seconds	*/
sqlSafef(query,sizeof(query),"select name,UNIX_TIMESTAMP(lastUse) from %s WHERE "
    "lastUse < DATE_SUB(NOW(), INTERVAL %ld SECOND);", CT_META_INFO,ageSeconds);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    hashAddInt(expiredHash, row[0], sqlSigned(row[1]));
sqlFreeResult(&sr);
sqlSafef(query,sizeof(query),"select name,UNIX_TIMESTAMP(lastUse) from %s WHERE "
    "lastUse >= DATE_SUB(NOW(), INTERVAL %ld SECOND);",CT_META_INFO,ageSeconds);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    hashAddInt(notExpiredHash, row[0], sqlSigned(row[1]));
sqlFreeResult(&sr);

if (tableStatus)  // show table status is very expensive, use only when asked
    {
    /*	run through the table status business to get table size information */
    sqlSafef(query,sizeof(query),"show table status");
    STATUS_INIT;
    while ((row = sqlNextRow(sr)) != NULL)
	{
	/* if not doing history too, and this is the history table, next row */
	if ((!historyToo) && (sameWord(row[nameIx],"history")))
	    continue;
	/* also skip the metaInfo table */
	if ((!historyToo) && (sameWord(row[nameIx],CT_META_INFO)))
	    continue;
	/* don't delete the extFile table  */
	if (sameWord(row[nameIx],CT_EXTFILE))
	    continue;

	SCAN_STATUS;

	if (hashLookup(expiredHash,row[nameIx]))
	    {
	    slNameAddHead(&expiredTableNames, row[nameIx]);
	    verbose(3,"%s %ld drop %s\n",row[timeIxUsed], (unsigned long)timep,
		    row[nameIx]);
	    /*	 If sizes are non-NULL, add them up	*/
	    if ( ((char *)NULL != row[dataLengthIx]) &&
		    ((char *)NULL != row[indexLengthIx]) )
		totalSize += sqlLongLong(row[dataLengthIx])
		    + sqlLongLong(row[indexLengthIx]);
	    hashRemove(expiredHash, row[nameIx]);
	    }
	else
	    {
	    if (hashLookup(notExpiredHash,row[nameIx]))
		verbose(3,"%s %ld OK %s\n",row[timeIxUsed], (unsigned long)timep,
		    row[nameIx]);
	    else
		{	/* table exists, but not in metaInfo, is it old enough ? */
		if (timep < dropTime)
		    {
		    slNameAddHead(&expiredTableNames, row[nameIx]);
		    verbose(2,"%s %ld dropt %s lost table\n",
			row[timeIxUsed], (unsigned long)timep, row[nameIx]);
		    /*       If sizes are non-NULL, add them up     */
		    if ( ((char *)NULL != row[dataLengthIx]) &&
			((char *)NULL != row[indexLengthIx]) )
			    totalSize += sqlLongLong(row[dataLengthIx])
				+ sqlLongLong(row[indexLengthIx]);
		    }
		else
		    verbose(3,"%s %ld OKt %s\n",row[timeIxUsed],
			(unsigned long)timep, row[nameIx]);
		}
	    }
	}
    sqlFreeResult(&sr);
    }
else
    {	// simple 'show tables' is more efficient than 'show table status'
    sqlSafef(query,sizeof(query),"show tables");
    sr = sqlGetResult(conn, query);
    while ((row = sqlNextRow(sr)) != NULL)
        {
	if (hashLookup(expiredHash,row[0]))
	    {
	    slNameAddHead(&expiredTableNames, row[0]);
	    time_t lastUse = (time_t)hashIntVal(expiredHash,row[0]);
	    struct tm *lastUseTm = localtime(&lastUse);
	    verbose(3,"%4d-%02d-%02d %02d:%02d:%02d %ld drop %s\n",
		lastUseTm->tm_year+1900, lastUseTm->tm_mon+1,
		lastUseTm->tm_mday, lastUseTm->tm_hour, lastUseTm->tm_min,
		lastUseTm->tm_sec, (unsigned long)lastUse,row[0]);
	    hashRemove(expiredHash, row[0]);
	    }
	else if (hashLookup(notExpiredHash,row[0]))
	    {
	    time_t lastUse = (time_t)hashIntVal(notExpiredHash,row[0]);
	    struct tm *lastUseTm = localtime(&lastUse);
	    verbose(3,"%4d-%02d-%02d %02d:%02d:%02d %ld OK %s\n",
		lastUseTm->tm_year+1900, lastUseTm->tm_mon+1,
		lastUseTm->tm_mday, lastUseTm->tm_hour, lastUseTm->tm_min,
		lastUseTm->tm_sec, (unsigned long)lastUse,row[0]);
	    }
	else
	    {
	    struct slName *el = slNameNew(row[0]);
	    slAddHead(&lostTables, el);
	    }
        }
    sqlFreeResult(&sr);
    lostTableCount = slCount(lostTables);
    // If tables exist, but not in metaInfo, check their age to expire them.
    // It turns out even this show table status is slow too, so, only
    // run thru it if asked to eliminate lost tables.  It is better to
    // do this operation with the stand-alone perl script on the customTrash
    // database machine.
    if (delLostTable && lostTables)
	{
	struct slName *el;
	for (el = lostTables; el != NULL; el = el->next)
	    {
	    if (sameWord(el->name,"history"))
		continue;
	    if (sameWord(el->name,CT_META_INFO))
		continue;
	    if (sameWord(el->name,CT_EXTFILE))
		continue;
	    boolean oneTableOnly = FALSE; // protect against multiple tables
	    /*	get table time information to see if it is expired */
	    sqlSafef(query,sizeof(query),"show table status like '%s'", el->name);
	    STATUS_INIT;

	    while ((row = sqlNextRow(sr)) != NULL)
		{
		if (oneTableOnly)
		    errAbort("ERROR: query: '%s' returned more than one table "
				"name\n", query);
		else
		    oneTableOnly = TRUE;
		if (differentWord(row[nameIx], el->name))
		    errAbort("ERROR: query: '%s' did not return table name '%s' != '%s'\n", query, el->name, row[nameIx]);

		SCAN_STATUS;

		if (timep < dropTime)
		    {
		    slNameAddHead(&expiredTableNames, row[nameIx]);
		    verbose(2,"%s %ld dropt %s lost table\n",
			row[timeIxUsed], (unsigned long)timep, row[nameIx]);
		    }
		else
		    verbose(3,"%s %ld OKt %s\n",
			row[timeIxUsed], (unsigned long)timep, row[nameIx]);
		}
	    sqlFreeResult(&sr);
	    }
	}
    }

/*	perhaps the table was already dropped, but not from the metaInfo */
struct hashEl *elList = hashElListHash(expiredHash);
struct hashEl *el;
for (el = elList; el != NULL; el = el->next)
    {
    verbose(2,"%s exists in %s only\n", el->name, CT_META_INFO);
    if (drop)
	ctTouchLastUse(conn, el->name, FALSE); /* removes metaInfo row */
    }

if (drop)
    {
    char comment[256];
    if (expiredTableNames)
	{
	struct slName *el;
	int droppedCount = 0;
	/* customTrash DB user permissions do not have permissions to
 	 * drop tables.  Must use standard special user that has all
 	 * permissions.  If we are not using the standard user at this
 	 * point, then switch to it.
	 */
	if (sameWord(db,CUSTOM_TRASH))
	    {
	    sqlDisconnect(&conn);
	    conn = sqlConnect(db);
	    }
	for (el = expiredTableNames; el != NULL; el = el->next)
	    {
	    verbose(2,"# drop %s\n", el->name);
	    sqlDropTable(conn, el->name);
	    ctTouchLastUse(conn, el->name, FALSE); /* removes metaInfo row */
	    ++droppedCount;
	    }
	/* add a comment to the history table and finish up connection */
	if (tableStatus)
	    safef(comment, sizeof(comment), "Dropped %d tables with "
		"total size %llu, %llu lost tables",
		    droppedCount, totalSize, lostTableCount);
	else
	    safef(comment, sizeof(comment),
		"Dropped %d tables, no size info, %llu lost tables",
		    droppedCount, lostTableCount);
	verbose(2,"# %s\n", comment);
	hgHistoryComment(conn, "%s", comment);
	}
    else
	{
	safef(comment, sizeof(comment),
	    "Dropped no tables, none expired, %llu lost tables",
		lostTableCount);
	verbose(2,"# %s\n", comment);
	}
    }
else
    {
    char comment[256];
    if (expiredTableNames)
	{
	int droppedCount = slCount(expiredTableNames);
	if (tableStatus)
	    safef(comment, sizeof(comment), "Would have dropped %d tables with "
		"total size %llu, %llu lost tables",
		    droppedCount, totalSize, lostTableCount);
	else
	    safef(comment, sizeof(comment),
		"Would have dropped %d tables, no size info, %llu lost tables",
		    droppedCount, lostTableCount);
	verbose(2,"# %s\n", comment);
	}
    else
	{
	safef(comment, sizeof(comment),
	    "Would have dropped no tables, none expired, %llu lost tables",
		lostTableCount);
	verbose(2,"# %s\n", comment);
	}
    }
sqlDisconnect(&conn);
}
void dbTrash(char *db)
/* dbTrash - drop tables from a database older than specified N hours. */
{
char query[256];
struct sqlResult *sr;
char **row;
int updateTimeIx;
int createTimeIx;
int dataLengthIx;
int indexLengthIx;
int nameIx;
int timeIxUsed;
unsigned long long totalSize = 0;
struct slName *tableNames = NULL;	/*	subject to age limits	*/
struct hash *expiredHash = newHash(10);
struct hash *notExpiredHash = newHash(10);
struct sqlConnection *conn = sqlConnect(db);

if (extFileCheck)
    checkExtFile(conn);

time_t ageSeconds = (time_t)(ageHours * 3600);	/*	age in seconds	*/
safef(query,sizeof(query),"select name from %s WHERE "
    "lastUse < DATE_SUB(NOW(), INTERVAL %ld SECOND);", CT_META_INFO,ageSeconds);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    hashAddInt(expiredHash, row[0], 1);
sqlFreeResult(&sr);
safef(query,sizeof(query),"select name from %s WHERE "
    "lastUse >= DATE_SUB(NOW(), INTERVAL %ld SECOND);",CT_META_INFO,ageSeconds);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    hashAddInt(notExpiredHash, row[0], 1);
sqlFreeResult(&sr);

/*	run through the table status business to get table size information */
safef(query,sizeof(query),"show table status");
sr = sqlGetResult(conn, query);
nameIx = sqlFieldColumn(sr, "Name");
createTimeIx = sqlFieldColumn(sr, "Create_time");
updateTimeIx = sqlFieldColumn(sr, "Update_time");
dataLengthIx = sqlFieldColumn(sr, "Data_length");
indexLengthIx = sqlFieldColumn(sr, "Index_length");
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct tm tm;
    time_t timep = 0;

    /* if not doing history too, and this is the history table, next row */
    if ((!historyToo) && (sameWord(row[nameIx],"history")))
	continue;
    /* also skip the metaInfo table */
    if ((!historyToo) && (sameWord(row[nameIx],CT_META_INFO)))
	continue;
    /* don't delete the extFile table  */
    if (sameWord(row[nameIx],CT_EXTFILE))
	continue;

    /*	Update_time is sometimes NULL on MySQL 5
     *	so if it fails, then check the Create_time
     */
    timeIxUsed = updateTimeIx;
    if ((row[updateTimeIx] != NULL) &&
	    (sscanf(row[updateTimeIx], "%4d-%2d-%2d %2d:%2d:%2d",
		&(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday),
		    &(tm.tm_hour), &(tm.tm_min), &(tm.tm_sec)) != 6) )
	{
	timeIxUsed = createTimeIx;
	if (sscanf(row[createTimeIx], "%4d-%2d-%2d %2d:%2d:%2d",
	    &(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday),
		&(tm.tm_hour), &(tm.tm_min), &(tm.tm_sec)) != 6)
	    {
	    verbose(2,"%s %s %s\n",
		row[createTimeIx],row[updateTimeIx],row[nameIx]);
	    errAbort("could not parse date %s or %s on table %s\n",
		row[createTimeIx], row[updateTimeIx], row[nameIx]);
	    }
	}
    tm.tm_year -= 1900;
    tm.tm_mon -= 1;
    tm.tm_isdst = -1;   /*      do not know timezone, figure it out */
    timep = mktime(&tm);

    if (hashLookup(expiredHash,row[nameIx]))
	{
	slNameAddHead(&tableNames, row[nameIx]);
	verbose(3,"%s %ld drop %s\n",row[timeIxUsed], (unsigned long)timep,
		row[nameIx]);
	/*	 If sizes are non-NULL, add them up	*/
	if ( ((char *)NULL != row[dataLengthIx]) &&
		((char *)NULL != row[indexLengthIx]) )
	    totalSize += sqlLongLong(row[dataLengthIx])
		+ sqlLongLong(row[indexLengthIx]);
	hashRemove(expiredHash, row[nameIx]);
	}
    else
	{
	if (hashLookup(notExpiredHash,row[nameIx]))
	    verbose(3,"%s %ld   OK %s\n",row[timeIxUsed], (unsigned long)timep,
		row[nameIx]);
	else
	    {	/* table exists, but not in metaInfo, is it old enough ? */
	    if (timep < dropTime)
		{
		slNameAddHead(&tableNames, row[nameIx]);
		verbose(2,"%s %ld dropt %s\n",
		    row[timeIxUsed], (unsigned long)timep, row[nameIx]);
		/*       If sizes are non-NULL, add them up     */
		if ( ((char *)NULL != row[dataLengthIx]) &&
		    ((char *)NULL != row[indexLengthIx]) )
			totalSize += sqlLongLong(row[dataLengthIx])
			    + sqlLongLong(row[indexLengthIx]);
		}
	    else
		verbose(3,"%s %ld  OKt %s\n",row[timeIxUsed],
		    (unsigned long)timep, row[nameIx]);
	    }
	}
    }
sqlFreeResult(&sr);

/*	perhaps the table was already dropped, but not from the metaInfo */
struct hashEl *elList = hashElListHash(expiredHash);
struct hashEl *el;
for (el = elList; el != NULL; el = el->next)
    {
    verbose(2,"%s exists in %s only\n", el->name, CT_META_INFO);
    if (drop)
	ctTouchLastUse(conn, el->name, FALSE); /* removes metaInfo row */
    }

if (drop)
    {
    if (tableNames)
	{
	char comment[256];
	struct slName *el;
	int droppedCount = 0;
	/* customTrash DB user permissions do not have permissions to
 	 * drop tables.  Must use standard special user that has all
 	 * permissions.  If we are not using the standard user at this
 	 * point, then switch to it.
	 */
	if (sameWord(db,CUSTOM_TRASH))
	    {
	    sqlDisconnect(&conn);
	    conn = sqlConnect(db);
	    }
	for (el = tableNames; el != NULL; el = el->next)
	    {
	    verbose(2,"# drop %s\n", el->name);
	    sqlDropTable(conn, el->name);
	    ctTouchLastUse(conn, el->name, FALSE); /* removes metaInfo row */
	    ++droppedCount;
	    }
	/* add a comment to the history table and finish up connection */
	safef(comment, sizeof(comment),
	    "Dropped %d tables with total size %llu", droppedCount, totalSize);
	verbose(2,"# %s\n", comment);
	hgHistoryComment(conn, comment);
	}
    }
sqlDisconnect(&conn);
}