Ejemplo n.º 1
0
void eapFixSortedBams(char *outSql)
/* eapFixSortedBams - Help fix early eap run that left bams sorted by read rather than by chromosome.. */
{
FILE *f = mustOpen(outSql, "w");
struct sqlConnection *conn = edwConnect();
char query[512];
sqlSafef(query, sizeof(query), "select * from edwAnalysisRun");
struct edwAnalysisRun *run, *runList = edwAnalysisRunLoadByQuery(conn, query);
for (run = runList; run != NULL; run = run->next)
    {
    if (run->createStatus > 0 && run->createCount == 1)
	{
	struct edwFile *create = getFilesFromIds(conn, run->createFileIds, run->createCount);
	if (create->submitId == badSubmit)
	    {
	    char *fileName = edwPathForFileId(conn, create->id);
	    if (!bamIsSortedByTarget(fileName, 1000))
		{
		hookupFix(conn, run, create, f);
		}
	    else
		errAbort("Looks like bad %s is already sorted\n", fileName);
	    }
	else if (create->submitId == goodSubmit)
	    {
	    char *fileName = edwPathForFileId(conn, create->id);
	    if (!bamIsSortedByTarget(fileName, 1000))
		{
		errAbort("Looks like good %s needs sorting\n", fileName);
		}
	    }
	}
    }
carefulClose(&f);
}
Ejemplo n.º 2
0
void edwFixRevoked(char *database, char *inFile)
/* edwFixRevoked - Mark as deprecated files that are revoked in ENCODE2. */
/* inFile is in format:
 *    metaVariable objStatus revoked [- reason]
 *    metaObject name */
{
struct sqlConnection *conn = edwConnect();
struct lineFile *lf = lineFileOpen(inFile, TRUE);
char *line;
char *defaultReason = "Revoked in ENCODE2";
char *reason = defaultReason;
while (lineFileNextReal(lf, &line))
    {
    if (startsWithWord("metaVariable", line))
        {
	char *pattern = "metaVariable objStatus revoked";
	if (startsWithWord(pattern, line))
	    {
	    reason = skipLeadingSpaces(line + strlen(pattern));
	    if (isEmpty(reason))
	        reason = defaultReason;
	    else
	        {
		if (reason[0] == '-')
		   reason = skipLeadingSpaces(reason + 1);
		reason = cloneString(reason);
		}
	    }
	else
	    errAbort("??? %s\n", line);
	}
    else if (startsWithWord("metaObject", line))
        {
	char *row[3];
	int wordCount = chopLine(line, row);
	if (wordCount != 2)
	    errAbort("Strange metaobject line %d of %s\n", lf->lineIx, lf->fileName);
	char *prefix = row[1];
	if (!startsWith("wgEncode", prefix))
	    errAbort("Strange object line %d of %s\n", lf->lineIx, lf->fileName);
	char query[512];
	sqlSafef(query, sizeof(query), 
	    "select * from edwFile where submitFileName like '%s/%%/%s%%'", database, prefix);
	struct edwFile *ef, *efList = edwFileLoadByQuery(conn, query);
	printf("# %s %s\n", prefix, reason);
	for (ef = efList; ef != NULL; ef = ef->next)
	    {
	    long long id = ef->id;
	    printf("update edwFile set deprecated='%s' where id=%lld;\n", reason, id);
	    }
	}
    else
        errAbort("Unrecognized first word in %s\n", line);
    }
}
Ejemplo n.º 3
0
void doValidatedEmail(struct edwSubmit *submit, boolean isComplete)
/* Send an email with info on all validated files */
{
struct sqlConnection *conn = edwConnect();
struct edwUser *user = edwUserFromId(conn, submit->userId);
struct dyString *message = dyStringNew(0);
/* Is this submission has no new file at all */
if ((submit->oldFiles != 0) && (submit->newFiles == 0) &&
    (submit->metaChangeCount == 0)  && isEmpty(submit->errorMessage)
     && (submit->fileIdInTransit == 0))
    {
    dyStringPrintf(message, "Your submission from %s is completed, but validation was not performed for this submission since all files in validate.txt have been previously submitted and validated.\n", submit->url);
    mailViaPipe(user->email, "EDW Validation Results", message->string, edwDaemonEmail);
    sqlDisconnect(&conn);
    dyStringFree(&message);
    return;
    }

if (isComplete)
    dyStringPrintf(message, "Your submission from %s is completely validated\n", submit->url);
else
    dyStringPrintf(message, 
	"Your submission hasn't validated after 24 hours, something is probably wrong\n"
	"at %s\n", submit->url);
dyStringPrintf(message, "\n#accession\tsubmitted_file_name\tnotes\n");
char query[512];
sqlSafef(query, sizeof(query),
    "select licensePlate,submitFileName "
    " from edwFile left join edwValidFile on edwFile.id = edwValidFile.fileId "
    " where edwFile.submitId = %u and edwFile.id != %u"
    , submit->id, submit->submitFileId);
struct sqlResult *sr = sqlGetResult(conn, query);
char **row;

while ((row = sqlNextRow(sr)) != NULL)
    {
    char *licensePlate = row[0];
    char *submitFileName = row[1];
    dyStringPrintf(message, "%s\t%s\t", naForNull(licensePlate), submitFileName);
    if (licensePlate == NULL)
        {
	dyStringPrintf(message, "Not validating");
	}
    dyStringPrintf(message, "\n");
    }
sqlFreeResult(&sr);

mailViaPipe(user->email, "EDW Validation Results", message->string, edwDaemonEmail);
sqlDisconnect(&conn);
dyStringFree(&message);
}
Ejemplo n.º 4
0
struct edwScriptRegistry *edwScriptRegistryFromCgi()
/* Get script registery from cgi variables.  Does authentication too. */
{
struct sqlConnection *conn = edwConnect();
char *user = sqlEscapeString(cgiString("user"));
char *password = sqlEscapeString(cgiString("password"));
char query[256];
sqlSafef(query, sizeof(query), "select * from edwScriptRegistry where name='%s'", user);
struct edwScriptRegistry *reg = edwScriptRegistryLoadByQuery(conn, query);
if (reg == NULL)
    accessDenied();
char key[EDW_SID_SIZE];
edwMakeSid(password, key);
if (!sameString(reg->secretHash, key))
    accessDenied();
sqlDisconnect(&conn);
return reg;
}
Ejemplo n.º 5
0
void edwToEap1(char *dir)
/* edwToEap1 - Help transforme edw format analysis tables to eap formatted ones.. */
{
makeDirsOnPath(dir);
struct sqlConnection *conn = edwConnect();

struct edwAnalysisJob *jobList = edwAnalysisJobLoadByQuery(conn, "select * from edwAnalysisJob order by id");
char jobFile[PATH_LEN];
safef(jobFile, PATH_LEN, "%s/%s", dir, "eapJob.tab");
transformJobTable(conn, jobList, jobFile);

struct edwAnalysisSoftware *swList = edwAnalysisSoftwareLoadByQuery(conn, "select * from edwAnalysisSoftware order by id");
struct hash *swHash = hashSwList(swList);
char softwareFile[PATH_LEN], swVersionFile[PATH_LEN];
safef(softwareFile, PATH_LEN, "%s/%s", dir, "eapSoftware.tab");
safef(swVersionFile, PATH_LEN, "%s/%s", dir, "eapSwVersion.tab");
transformSoftwareTable(conn, swList, softwareFile, swVersionFile);

struct edwAnalysisStep *stepList = edwAnalysisStepLoadByQuery(conn, "select * from edwAnalysisStep order by id");
struct hash *stepHash = hashStepList(stepList);
verbose(1, "stepHash has %d els\n", stepHash->elCount);
char stepFile[PATH_LEN], stepVersionFile[PATH_LEN], stepSoftwareFile[PATH_LEN];
safef(stepFile, PATH_LEN, "%s/%s", dir, "eapStep.tab");
safef(stepVersionFile,  PATH_LEN, "%s/%s", dir, "eapStepVersion.tab");
safef(stepSoftwareFile, PATH_LEN, "%s/%s", dir, "eapStepSoftware.tab");
transformStepTable(conn, stepList, swHash, stepFile, stepVersionFile, stepSoftwareFile);

char stepVersionSwVersionFile[PATH_LEN];
safef(stepVersionSwVersionFile, PATH_LEN, "%s/%s", dir, "eapStepSwVersion.tab");
versionVsVersion(conn, stepList, stepHash, swList, swHash, stepVersionSwVersionFile);

struct edwAnalysisRun *runList = edwAnalysisRunLoadByQuery(conn, "select * from edwAnalysisRun order by id");
char analysisFile[PATH_LEN], inputFile[PATH_LEN], outputFile[PATH_LEN];
safef(analysisFile, PATH_LEN, "%s/%s", dir, "eapAnalysis.tab");
safef(inputFile, PATH_LEN, "%s/%s", dir, "eapInput.tab");
safef(outputFile, PATH_LEN, "%s/%s", dir, "eapOutput.tab");
transformRun(conn, runList, stepHash, analysisFile, inputFile, outputFile);

if (optionExists("load"))
     {
     loadEapDb(dir);
     }
}
Ejemplo n.º 6
0
void waitForValidationAndSendEmail(struct edwSubmit *submit, char *email)
/* Poll database every 5 minute or so to see if finished. */
{
int maxSeconds = 3600*24;
int secondsPer = 60*5;
int seconds;
for (seconds = 0; seconds < maxSeconds; seconds += secondsPer)
    {
    struct sqlConnection *conn = edwConnect();
    if (edwSubmitIsValidated(submit, conn))
         {
	 doValidatedEmail(submit, TRUE);
	 return;
	 }
    verbose(2, "waiting for validation\n");
    sqlDisconnect(&conn);
    sleep(secondsPer);	// Sleep for 5 more minutes
    }
doValidatedEmail(submit, FALSE);
}
Ejemplo n.º 7
0
void edwScriptSubmitStatus()
/* edwScriptSubmitStatus - Programatically check status of submission.. */
{
/* Pause a second - prevent inadvertent harsh denial of service from scripts. */
sleep(2);

edwScriptRegistryFromCgi();

/* Get submission from url. */
struct sqlConnection *conn = edwConnect();
char query[512];
char *url = cgiString("url");
struct edwSubmit *sub = edwMostRecentSubmission(conn, url);
char *status = NULL;
if (sub == NULL)
    {
    int posInQueue = edwSubmitPositionInQueue(conn, url, NULL);
    if (posInQueue == -1)
         errAbort("%s has not been submitted", url);
    else
         status = "pending";
    }
else
    {
    time_t endUploadTime = sub->endUploadTime;
    if (!isEmpty(sub->errorMessage))
        {
	status = "error";
	}
    else if (endUploadTime == 0)  
	{
	status = "uploading";
	}
    else
        {
	safef(query, sizeof(query), 
	    "select count(*) from edwFile where submitId=%u and errorMessage != ''",
	    sub->id);
	int errCount = sqlQuickNum(conn, query);
	int newValid = edwSubmitCountNewValid(sub, conn);
	if (newValid + errCount < sub->newFiles)
	    status = "validating";
	else if (errCount > 0)
	    status = "error";
	else
	    status = "success";
	}
    }

/* Construct JSON result */
struct dyString *dy = dyStringNew(0);
dyStringPrintf(dy, "{\n");
dyStringPrintf(dy, "    \"status\": \"%s\"", status);
if (sameString(status, "error"))
    {
    dyStringPrintf(dy, ",\n");
    dyStringPrintf(dy, "    \"errors\": [\n");
    int errCount = 0;
    if (!isEmpty(sub->errorMessage))
        {
	addErrFile(dy, errCount, sub->url, sub->errorMessage);
	++errCount;
	}
    safef(query, sizeof(query), "select * from edwFile where submitId=%u and errorMessage != ''",
	sub->id);
    struct edwFile *file, *fileList = edwFileLoadByQuery(conn, query);
    for (file = fileList; file != NULL; file = file->next)
        {
	addErrFile(dy, errCount, file->submitFileName, file->errorMessage);
	++errCount;
	}
    dyStringPrintf(dy, "\n    ]\n");
    dyStringPrintf(dy, "}\n");
    }
else
    {
    dyStringPrintf(dy, "\n}\n");
    }

/* Write out HTTP response */
printf("Content-Length: %d\r\n", dy->stringSize);
puts("Content-Type: application/json; charset=UTF-8\r");
puts("\r");
printf("%s", dy->string);
}
Ejemplo n.º 8
0
void edwFixReplaced(char *database, char *inTab, char *spikedTab, char *outSql, char *outRa)
/* edwFixReplaced - Clean up files that were replaced in ENCODE2. */
{
struct sqlConnection *conn = edwConnect();
struct lineFile *lf = lineFileOpen(inTab, TRUE);
FILE *fSql = mustOpen(outSql, "w");
FILE *fRa = mustOpen(outRa, "w");
char *row[2];
struct hash *renameHash = rootRenameHash();
struct hash *spikedHash = hashTwoColumnFile(spikedTab);
int depCount = 0, repCount = 0;
while (lineFileRowTab(lf, row))
    {
    /* Get fields in local variables. */
    char *oldFileName = row[0];
    char *objStatus = row[1];

    /* Do spikein rename lookup. */
    char *spiked = hashFindVal(spikedHash, oldFileName);
    if (spiked != NULL)
	{
	verbose(2, "renaming spikeing %s to %s\n", oldFileName, spiked);
        oldFileName = spiked;
	}

    /* Get rid of bai name for bam,bai pairs. */
    char *comma = strchr(oldFileName, ',');
    if (comma != NULL)
        {
	if (!endsWith(comma, ".bai"))
	    errAbort("Unexpected conjoining of files line %d of %s", lf->lineIx, lf->fileName);
	*comma = 0;
	}

    /* For .fastq.tgz files we got to unpack them. */
    if (endsWith(oldFileName, ".fastq.tgz"))
	{
	/* Get root name - name minus suffix */
	char *oldRoot = cloneString(oldFileName);
	chopSuffix(oldRoot);
	chopSuffix(oldRoot);
	verbose(2, "Processing fastq.tgz %s %s\n", oldFileName, oldRoot);

	// Find records for old version.
	char query[512];
	sqlSafef(query, sizeof(query), 
	    "select * from edwFile where submitFileName like '%s/%%/%s.fastq.tgz.dir/%%'"
	    " order by submitFileName",
	    database, oldRoot);
	struct edwFile *oldList = edwFileLoadByQuery(conn, query);
	int oldCount = slCount(oldList);
	if (oldCount == 0)
	    errAbort("No records match %s", query);


	// Find record for replaced version.
	// Fortunately all of the fastq.tgz's are just V2, which simplifies code a bit
	sqlSafef(query, sizeof(query), 
	    "select * from edwFile where submitFileName like '%s/%%/%sV2.fastq.tgz.dir/%%'"
	    " order by submitFileName",
	    database, oldRoot);
	struct edwFile *newList = edwFileLoadByQuery(conn, query);
	int newCount = slCount(newList);
	if (newCount == 0)
	    errAbort("No records match %s", query);

	// Make a hash of new records keyed by new file name inside of tgz
	struct edwFile *newEf;
	struct hash *newHash = hashNew(0);
	for (newEf = newList; newEf != NULL; newEf = newEf->next)
	    {
	    char fileName[FILENAME_LEN];
	    splitPath(newEf->submitFileName, NULL, fileName, NULL);
	    hashAdd(newHash, fileName, newEf);
	    verbose(2, " %s\n", fileName);
	    }
	verbose(2, "%d in oldList, %d in newList\n", oldCount, newCount);

	// Loop through old records trying to find corresponding new record
	struct edwFile *oldEf;
	for (oldEf = oldList; oldEf != NULL; oldEf = oldEf->next)
	    {
	    char fileName[FILENAME_LEN];
	    splitPath(oldEf->submitFileName, NULL, fileName, NULL);
	    struct edwFile *newEf = hashFindVal(newHash, fileName);
	    char *newName = "n/a";
	    fprintf(fSql, "update edwFile set deprecated='%s' where id=%u;\n", objStatus, oldEf->id);
	    ++depCount;
	    if (newEf != NULL)
	        {
		fprintf(fSql, "update edwFile set replacedBy=%u where id=%u;\n", newEf->id, oldEf->id);
		newName = newEf->submitFileName;
		++repCount;
		}
	    fprintf(fRa, "objStatus %s\n", objStatus);
	    fprintf(fRa, "oldFile %s\n", oldEf->submitFileName);
	    fprintf(fRa, "newFile %s\n", newName);
	    fprintf(fRa, "\n");
	    verbose(2, "%s -> %s\n", oldEf->submitFileName, newName);
	    }
	}
    else
	{

	/* Figure out new file name by either adding V2 at end, or if there is already a V#,
	 * replacing it. */
#ifdef SOON
#endif /* SOON */
	int oldVersion = 1;
	char *noVersion = NULL;
	    {
	    /* Split old file name into root and suffix. */
	    char *suffix = edwFindDoubleFileSuffix(oldFileName);
	    if (suffix == NULL)
		errAbort("No suffix in %s line %d of %s", oldFileName, lf->lineIx, lf->fileName);
	    char *oldRoot = cloneStringZ(oldFileName, suffix - oldFileName);
	    char *renamed = hashFindVal(renameHash, oldRoot);
	    if (renamed != NULL)
		{
		verbose(2, "Overriding %s with %s\n", oldRoot, renamed);
		oldRoot = cloneString(renamed);
		}


	    /* Look for V# at end of old root, and if it's there chop it off and update oldVersion */
	    noVersion = oldRoot;  // If no V, we done. */
	    char *vPos = strrchr(oldRoot, 'V');
	    if (vPos != NULL)
		{
		char *numPos = vPos + 1;
		int numSize = strlen(numPos);
		if (numSize == 1 || numSize == 2)
		    {
		    if (isAllDigits(numPos))
			{
			oldVersion = atoi(numPos);
			*vPos = 0;
			}
		    else
			errAbort("Expecting numbers after V in file name got %s line %d of %s",
			    numPos, lf->lineIx, lf->fileName);
		    }
		}
	    verbose(2, "%s parses to  %s %d %s\n", oldFileName, noVersion, oldVersion, suffix);

	    /* Find record for old file. */
	    char query[512];
	    sqlSafef(query, sizeof(query), 
		"select * from edwFile where submitFileName like '%s/%%/%s'", 
		database, oldFileName);
	    struct edwFile *oldEf = edwFileLoadByQuery(conn, query);
	    if (slCount(oldEf) != 1)
		errAbort("Expecting one result got %d for %s\n", slCount(oldEf), query);
	    fprintf(fSql, "# %s %s\n", oldFileName, objStatus);
	    verbose(2, "%s: %s\n", oldFileName, objStatus);

	    /* Find record for new file. */
	    struct edwFile *newEf = NULL;
	    int newVersion;
	    for (newVersion = oldVersion+1; newVersion < 7; ++newVersion)
		{
		sqlSafef(query, sizeof(query), 
		    "select * from edwFile where submitFileName like '%s/%%/%sV%d%s'",
		    database, noVersion, newVersion, suffix); 
		newEf = edwFileLoadByQuery(conn, query);
		if (newEf != NULL)
		    break;
		}
	    if (newEf == NULL)
		verbose(2, "Could not find next version of %s (%s)", oldFileName, oldRoot);
	    if (slCount(newEf) > 1)
		errAbort("Expecting one result got %d for %s\n", slCount(newEf), query);

	    long long oldId = oldEf->id;
	    fprintf(fSql, "update edwFile set deprecated='%s' where id=%lld;\n", objStatus, oldId);
	    ++depCount;
	    char *newName = "n/a";
	    if (newEf != NULL)
		{
		long long newId = newEf->id;
		fprintf(fSql, "update edwFile set replacedBy=%lld where id=%lld;\n", newId, oldId);
		newName = newEf->submitFileName;
		++repCount;
		}
	    fprintf(fRa, "objStatus %s\n", objStatus);
	    fprintf(fRa, "oldFile %s\n", oldEf->submitFileName);
	    fprintf(fRa, "newFile %s\n", newName);
	    fprintf(fRa, "\n");
	    verbose(2, "%s -> %s\n", oldEf->submitFileName, newName);
	    }
	}
    }
verbose(1, "%d deprecated, %d replaced\n", depCount, repCount);
carefulClose(&fSql);
carefulClose(&fRa);
}