Exemplo n.º 1
0
struct edwFile *edwFileFromId(struct sqlConnection *conn, long long fileId)
/* Return edwValidFile given fileId - return NULL if not found. */
{
char query[128];
sqlSafef(query, sizeof(query), "select * from edwFile where id=%lld", fileId);
return edwFileLoadByQuery(conn, query);
}
struct edwFile *edwFileLoadIdRange(struct sqlConnection *conn, long long startId, long long endId)
/* Return list of all files in given id range */
{
char query[256];
sqlSafef(query, sizeof(query), 
    "select * from edwFile where id>=%lld and id<=%lld and endUploadTime != 0 "
    "and updateTime != 0 and deprecated = ''", 
    startId, endId);
return edwFileLoadByQuery(conn, query);
}
Exemplo n.º 3
0
void edwFixRevoked(char *database, char *inFile)
/* edwFixRevoked - Mark as deprecated files that are revoked in ENCODE2. */
/* inFile is in format:
 *    metaVariable objStatus revoked [- reason]
 *    metaObject name */
{
struct sqlConnection *conn = edwConnect();
struct lineFile *lf = lineFileOpen(inFile, TRUE);
char *line;
char *defaultReason = "Revoked in ENCODE2";
char *reason = defaultReason;
while (lineFileNextReal(lf, &line))
    {
    if (startsWithWord("metaVariable", line))
        {
	char *pattern = "metaVariable objStatus revoked";
	if (startsWithWord(pattern, line))
	    {
	    reason = skipLeadingSpaces(line + strlen(pattern));
	    if (isEmpty(reason))
	        reason = defaultReason;
	    else
	        {
		if (reason[0] == '-')
		   reason = skipLeadingSpaces(reason + 1);
		reason = cloneString(reason);
		}
	    }
	else
	    errAbort("??? %s\n", line);
	}
    else if (startsWithWord("metaObject", line))
        {
	char *row[3];
	int wordCount = chopLine(line, row);
	if (wordCount != 2)
	    errAbort("Strange metaobject line %d of %s\n", lf->lineIx, lf->fileName);
	char *prefix = row[1];
	if (!startsWith("wgEncode", prefix))
	    errAbort("Strange object line %d of %s\n", lf->lineIx, lf->fileName);
	char query[512];
	sqlSafef(query, sizeof(query), 
	    "select * from edwFile where submitFileName like '%s/%%/%s%%'", database, prefix);
	struct edwFile *ef, *efList = edwFileLoadByQuery(conn, query);
	printf("# %s %s\n", prefix, reason);
	for (ef = efList; ef != NULL; ef = ef->next)
	    {
	    long long id = ef->id;
	    printf("update edwFile set deprecated='%s' where id=%lld;\n", reason, id);
	    }
	}
    else
        errAbort("Unrecognized first word in %s\n", line);
    }
}
Exemplo n.º 4
0
struct edwFile *edwFileInProgress(struct sqlConnection *conn, int submitId)
/* Return file in submission in process of being uploaded if any. */
{
char query[256];
sqlSafef(query, sizeof(query), "select fileIdInTransit from edwSubmit where id=%u", submitId);
long long fileId = sqlQuickLongLong(conn, query);
if (fileId == 0)
    return NULL;
sqlSafef(query, sizeof(query), "select * from edwFile where id=%lld", (long long)fileId);
return edwFileLoadByQuery(conn, query);
}
Exemplo n.º 5
0
struct edwFile *edwFileAllIntactBetween(struct sqlConnection *conn, int startId, int endId)
/* Return list of all files that are intact (finished uploading and MD5 checked) 
 * with file IDs between startId and endId - including endId */
{
char query[256];
sqlSafef(query, sizeof(query), 
    "select * from edwFile where id>=%d and id<=%d and endUploadTime != 0 "
    "and updateTime != 0 and errorMessage = '' and deprecated = ''", 
    startId, endId);
return edwFileLoadByQuery(conn, query);
}
Exemplo n.º 6
0
void edwFixGtfBigBed(char *how)
/* edwFixGtfBigBed - In original import the .gtf.bigBed files were bad about half the time.  Cricket 
 * caught this because a bunch of them ended up with the same md5 sum.  This program regenerates them 
 * all. */
{
doReal = sameString(how, "real");
struct sqlConnection *conn = edwConnectReadWrite();
struct edwFile *redoEf, *redoList = 
    edwFileLoadByQuery(conn, "select * from edwFile where submitFileName like '%.gtf.bigBed'");
for (redoEf = redoList; redoEf != NULL; redoEf = redoEf->next)
    {
    redoOne(conn, redoEf);
    }
}
Exemplo n.º 7
0
void edwMakeReplicateQa(int startId, int endId)
/* edwMakeReplicateQa - Do qa level comparisons of replicates.. */
{
/* Make list with all files in ID range */
struct sqlConnection *conn = sqlConnect(edwDatabase);
char query[256];
sqlSafef(query, sizeof(query), 
    "select * from edwFile where id>=%d and id<=%d and endUploadTime != 0 "
    "and updateTime != 0 and deprecated = ''", 
    startId, endId);
struct edwFile *ef, *efList = edwFileLoadByQuery(conn, query);

for (ef = efList; ef != NULL; ef = ef->next)
    {
    doReplicateQa(conn, ef);
    }
}
Exemplo n.º 8
0
void redoOne(struct sqlConnection *conn, struct edwFile *redoEf)
/* Redo one file. */
{
/* Figure out submit file name of the gtf file. */
char gtfFileName[PATH_LEN];
strcpy(gtfFileName, redoEf->submitFileName);
chopSuffix(gtfFileName);
strcat(gtfFileName, ".gz");

/* Get edwFile record for gtf file. */
char query[PATH_LEN+64];
safef(query, sizeof(query), "select * from edwFile where submitFileName='%s'", gtfFileName);
struct edwFile *sourceEf = edwFileLoadByQuery(conn, query);
assert(slCount(sourceEf) == 1);

/* Get UCSC database */
safef(query, sizeof(query), "select ucscDb from edwValidFile where fileId=%u", sourceEf->id);
char ucscDb[64] = "";
sqlQuickQuery(conn, query, ucscDb, sizeof(ucscDb));
assert(ucscDb[0] != 0);

/* Remake the big bed file. */
char sourceFileName[PATH_LEN], destFileName[PATH_LEN];
safef(sourceFileName, sizeof(sourceFileName), "%s%s", edwRootDir, sourceEf->edwFileName);
safef(destFileName, sizeof(destFileName), "%s%s", edwRootDir, redoEf->edwFileName);
makeGtfBigBed(ucscDb, sourceFileName, destFileName);

/* Recalculate size and md5 sum and validation key. */
char *md5 = md5HexForFile(destFileName);
long long size = fileSize(destFileName);
char *validKey = encode3CalcValidationKey(md5, size);

/* Issue command to update md5 in database. */
char command[2*PATH_LEN];
safef(command, sizeof(command),
    "hgsql -e 'update edwFile set md5=\"%s\" where id=%u' encodeDataWarehouse", md5, redoEf->id);
doSystem(command);

/* Issue command to update tags in database. */
char *newTags = cgiStringNewValForVar(redoEf->tags, "valid_key", validKey); 
if (doReal)
    {
    edwFileResetTags(conn, redoEf, newTags);
    }
}
Exemplo n.º 9
0
void edwScriptSubmitStatus()
/* edwScriptSubmitStatus - Programatically check status of submission.. */
{
/* Pause a second - prevent inadvertent harsh denial of service from scripts. */
sleep(2);

edwScriptRegistryFromCgi();

/* Get submission from url. */
struct sqlConnection *conn = edwConnect();
char query[512];
char *url = cgiString("url");
struct edwSubmit *sub = edwMostRecentSubmission(conn, url);
char *status = NULL;
if (sub == NULL)
    {
    int posInQueue = edwSubmitPositionInQueue(conn, url, NULL);
    if (posInQueue == -1)
         errAbort("%s has not been submitted", url);
    else
         status = "pending";
    }
else
    {
    time_t endUploadTime = sub->endUploadTime;
    if (!isEmpty(sub->errorMessage))
        {
	status = "error";
	}
    else if (endUploadTime == 0)  
	{
	status = "uploading";
	}
    else
        {
	safef(query, sizeof(query), 
	    "select count(*) from edwFile where submitId=%u and errorMessage != ''",
	    sub->id);
	int errCount = sqlQuickNum(conn, query);
	int newValid = edwSubmitCountNewValid(sub, conn);
	if (newValid + errCount < sub->newFiles)
	    status = "validating";
	else if (errCount > 0)
	    status = "error";
	else
	    status = "success";
	}
    }

/* Construct JSON result */
struct dyString *dy = dyStringNew(0);
dyStringPrintf(dy, "{\n");
dyStringPrintf(dy, "    \"status\": \"%s\"", status);
if (sameString(status, "error"))
    {
    dyStringPrintf(dy, ",\n");
    dyStringPrintf(dy, "    \"errors\": [\n");
    int errCount = 0;
    if (!isEmpty(sub->errorMessage))
        {
	addErrFile(dy, errCount, sub->url, sub->errorMessage);
	++errCount;
	}
    safef(query, sizeof(query), "select * from edwFile where submitId=%u and errorMessage != ''",
	sub->id);
    struct edwFile *file, *fileList = edwFileLoadByQuery(conn, query);
    for (file = fileList; file != NULL; file = file->next)
        {
	addErrFile(dy, errCount, file->submitFileName, file->errorMessage);
	++errCount;
	}
    dyStringPrintf(dy, "\n    ]\n");
    dyStringPrintf(dy, "}\n");
    }
else
    {
    dyStringPrintf(dy, "\n}\n");
    }

/* Write out HTTP response */
printf("Content-Length: %d\r\n", dy->stringSize);
puts("Content-Type: application/json; charset=UTF-8\r");
puts("\r");
printf("%s", dy->string);
}
Exemplo n.º 10
0
void edwFixReplaced(char *database, char *inTab, char *spikedTab, char *outSql, char *outRa)
/* edwFixReplaced - Clean up files that were replaced in ENCODE2. */
{
struct sqlConnection *conn = edwConnect();
struct lineFile *lf = lineFileOpen(inTab, TRUE);
FILE *fSql = mustOpen(outSql, "w");
FILE *fRa = mustOpen(outRa, "w");
char *row[2];
struct hash *renameHash = rootRenameHash();
struct hash *spikedHash = hashTwoColumnFile(spikedTab);
int depCount = 0, repCount = 0;
while (lineFileRowTab(lf, row))
    {
    /* Get fields in local variables. */
    char *oldFileName = row[0];
    char *objStatus = row[1];

    /* Do spikein rename lookup. */
    char *spiked = hashFindVal(spikedHash, oldFileName);
    if (spiked != NULL)
	{
	verbose(2, "renaming spikeing %s to %s\n", oldFileName, spiked);
        oldFileName = spiked;
	}

    /* Get rid of bai name for bam,bai pairs. */
    char *comma = strchr(oldFileName, ',');
    if (comma != NULL)
        {
	if (!endsWith(comma, ".bai"))
	    errAbort("Unexpected conjoining of files line %d of %s", lf->lineIx, lf->fileName);
	*comma = 0;
	}

    /* For .fastq.tgz files we got to unpack them. */
    if (endsWith(oldFileName, ".fastq.tgz"))
	{
	/* Get root name - name minus suffix */
	char *oldRoot = cloneString(oldFileName);
	chopSuffix(oldRoot);
	chopSuffix(oldRoot);
	verbose(2, "Processing fastq.tgz %s %s\n", oldFileName, oldRoot);

	// Find records for old version.
	char query[512];
	sqlSafef(query, sizeof(query), 
	    "select * from edwFile where submitFileName like '%s/%%/%s.fastq.tgz.dir/%%'"
	    " order by submitFileName",
	    database, oldRoot);
	struct edwFile *oldList = edwFileLoadByQuery(conn, query);
	int oldCount = slCount(oldList);
	if (oldCount == 0)
	    errAbort("No records match %s", query);


	// Find record for replaced version.
	// Fortunately all of the fastq.tgz's are just V2, which simplifies code a bit
	sqlSafef(query, sizeof(query), 
	    "select * from edwFile where submitFileName like '%s/%%/%sV2.fastq.tgz.dir/%%'"
	    " order by submitFileName",
	    database, oldRoot);
	struct edwFile *newList = edwFileLoadByQuery(conn, query);
	int newCount = slCount(newList);
	if (newCount == 0)
	    errAbort("No records match %s", query);

	// Make a hash of new records keyed by new file name inside of tgz
	struct edwFile *newEf;
	struct hash *newHash = hashNew(0);
	for (newEf = newList; newEf != NULL; newEf = newEf->next)
	    {
	    char fileName[FILENAME_LEN];
	    splitPath(newEf->submitFileName, NULL, fileName, NULL);
	    hashAdd(newHash, fileName, newEf);
	    verbose(2, " %s\n", fileName);
	    }
	verbose(2, "%d in oldList, %d in newList\n", oldCount, newCount);

	// Loop through old records trying to find corresponding new record
	struct edwFile *oldEf;
	for (oldEf = oldList; oldEf != NULL; oldEf = oldEf->next)
	    {
	    char fileName[FILENAME_LEN];
	    splitPath(oldEf->submitFileName, NULL, fileName, NULL);
	    struct edwFile *newEf = hashFindVal(newHash, fileName);
	    char *newName = "n/a";
	    fprintf(fSql, "update edwFile set deprecated='%s' where id=%u;\n", objStatus, oldEf->id);
	    ++depCount;
	    if (newEf != NULL)
	        {
		fprintf(fSql, "update edwFile set replacedBy=%u where id=%u;\n", newEf->id, oldEf->id);
		newName = newEf->submitFileName;
		++repCount;
		}
	    fprintf(fRa, "objStatus %s\n", objStatus);
	    fprintf(fRa, "oldFile %s\n", oldEf->submitFileName);
	    fprintf(fRa, "newFile %s\n", newName);
	    fprintf(fRa, "\n");
	    verbose(2, "%s -> %s\n", oldEf->submitFileName, newName);
	    }
	}
    else
	{

	/* Figure out new file name by either adding V2 at end, or if there is already a V#,
	 * replacing it. */
#ifdef SOON
#endif /* SOON */
	int oldVersion = 1;
	char *noVersion = NULL;
	    {
	    /* Split old file name into root and suffix. */
	    char *suffix = edwFindDoubleFileSuffix(oldFileName);
	    if (suffix == NULL)
		errAbort("No suffix in %s line %d of %s", oldFileName, lf->lineIx, lf->fileName);
	    char *oldRoot = cloneStringZ(oldFileName, suffix - oldFileName);
	    char *renamed = hashFindVal(renameHash, oldRoot);
	    if (renamed != NULL)
		{
		verbose(2, "Overriding %s with %s\n", oldRoot, renamed);
		oldRoot = cloneString(renamed);
		}


	    /* Look for V# at end of old root, and if it's there chop it off and update oldVersion */
	    noVersion = oldRoot;  // If no V, we done. */
	    char *vPos = strrchr(oldRoot, 'V');
	    if (vPos != NULL)
		{
		char *numPos = vPos + 1;
		int numSize = strlen(numPos);
		if (numSize == 1 || numSize == 2)
		    {
		    if (isAllDigits(numPos))
			{
			oldVersion = atoi(numPos);
			*vPos = 0;
			}
		    else
			errAbort("Expecting numbers after V in file name got %s line %d of %s",
			    numPos, lf->lineIx, lf->fileName);
		    }
		}
	    verbose(2, "%s parses to  %s %d %s\n", oldFileName, noVersion, oldVersion, suffix);

	    /* Find record for old file. */
	    char query[512];
	    sqlSafef(query, sizeof(query), 
		"select * from edwFile where submitFileName like '%s/%%/%s'", 
		database, oldFileName);
	    struct edwFile *oldEf = edwFileLoadByQuery(conn, query);
	    if (slCount(oldEf) != 1)
		errAbort("Expecting one result got %d for %s\n", slCount(oldEf), query);
	    fprintf(fSql, "# %s %s\n", oldFileName, objStatus);
	    verbose(2, "%s: %s\n", oldFileName, objStatus);

	    /* Find record for new file. */
	    struct edwFile *newEf = NULL;
	    int newVersion;
	    for (newVersion = oldVersion+1; newVersion < 7; ++newVersion)
		{
		sqlSafef(query, sizeof(query), 
		    "select * from edwFile where submitFileName like '%s/%%/%sV%d%s'",
		    database, noVersion, newVersion, suffix); 
		newEf = edwFileLoadByQuery(conn, query);
		if (newEf != NULL)
		    break;
		}
	    if (newEf == NULL)
		verbose(2, "Could not find next version of %s (%s)", oldFileName, oldRoot);
	    if (slCount(newEf) > 1)
		errAbort("Expecting one result got %d for %s\n", slCount(newEf), query);

	    long long oldId = oldEf->id;
	    fprintf(fSql, "update edwFile set deprecated='%s' where id=%lld;\n", objStatus, oldId);
	    ++depCount;
	    char *newName = "n/a";
	    if (newEf != NULL)
		{
		long long newId = newEf->id;
		fprintf(fSql, "update edwFile set replacedBy=%lld where id=%lld;\n", newId, oldId);
		newName = newEf->submitFileName;
		++repCount;
		}
	    fprintf(fRa, "objStatus %s\n", objStatus);
	    fprintf(fRa, "oldFile %s\n", oldEf->submitFileName);
	    fprintf(fRa, "newFile %s\n", newName);
	    fprintf(fRa, "\n");
	    verbose(2, "%s -> %s\n", oldEf->submitFileName, newName);
	    }
	}
    }
verbose(1, "%d deprecated, %d replaced\n", depCount, repCount);
carefulClose(&fSql);
carefulClose(&fRa);
}
Exemplo n.º 11
0
struct edwFile *edwGetLocalFile(struct sqlConnection *conn, char *localAbsolutePath, 
    char *symLinkMd5Sum)
/* Get record of local file from database, adding it if it doesn't already exist.
 * Can make it a symLink rather than a copy in which case pass in valid MD5 sum
 * for symLinkM5dSum. */
{
/* First do a reality check on the local absolute path.  Is there a file there? */
if (localAbsolutePath[0] != '/')
    errAbort("Using relative path in edwAddLocalFile.");
long long size = fileSize(localAbsolutePath);
if (size == -1)
    errAbort("%s does not exist", localAbsolutePath);
long long updateTime = fileModTime(localAbsolutePath);

/* Get file if it's in database already. */
int submitDirId = getLocalSubmitDir(conn);
int submitId = getLocalSubmit(conn);
char query[256+PATH_LEN];
sqlSafef(query, sizeof(query), "select * from edwFile where submitId=%d and submitFileName='%s'",
    submitId, localAbsolutePath);
struct edwFile *ef = edwFileLoadByQuery(conn, query);

/* If we got something in database, check update time and size, and if it's no change just 
 * return existing database id. */
if (ef != NULL && ef->updateTime == updateTime && ef->size == size)
    return ef;

/* If we got here, then we need to make a new file record. Start with pretty empty record
 * that just has file ID, submitted file name and a few things*/
sqlSafef(query, sizeof(query), 
    "insert edwFile (submitId,submitDirId,submitFileName,startUploadTime) "
            " values(%d, %d, '%s', %lld)"
	    , submitId, submitDirId, localAbsolutePath, edwNow());
sqlUpdate(conn, query);
long long fileId = sqlLastAutoId(conn);

/* Create big data warehouse file/path name. */
char edwFile[PATH_LEN], edwPath[PATH_LEN];
edwMakeFileNameAndPath(fileId, localAbsolutePath, edwFile, edwPath);

/* We're a little paranoid so md5 it */
char *md5;

/* Do copy or symbolic linking of file into warehouse managed dir. */
if (symLinkMd5Sum)
    {
    md5 = symLinkMd5Sum;
    makeSymLink(localAbsolutePath, edwPath);  
    }
else
    {
    copyFile(localAbsolutePath, edwPath);
    md5 = md5HexForFile(localAbsolutePath);
    }

/* Update file record. */
sqlSafef(query, sizeof(query), 
    "update edwFile set edwFileName='%s', endUploadTime=%lld,"
                       "updateTime=%lld, size=%lld, md5='%s' where id=%lld"
			, edwFile, edwNow(), updateTime, size, md5, fileId);
sqlUpdate(conn, query);

/* Now, it's a bit of a time waste, but cheap in code, to just load it back from DB. */
sqlSafef(query, sizeof(query), "select * from edwFile where id=%lld", fileId);
return edwFileLoadByQuery(conn, query);
}