void eapFixSortedBams(char *outSql) /* eapFixSortedBams - Help fix early eap run that left bams sorted by read rather than by chromosome.. */ { FILE *f = mustOpen(outSql, "w"); struct sqlConnection *conn = edwConnect(); char query[512]; sqlSafef(query, sizeof(query), "select * from edwAnalysisRun"); struct edwAnalysisRun *run, *runList = edwAnalysisRunLoadByQuery(conn, query); for (run = runList; run != NULL; run = run->next) { if (run->createStatus > 0 && run->createCount == 1) { struct edwFile *create = getFilesFromIds(conn, run->createFileIds, run->createCount); if (create->submitId == badSubmit) { char *fileName = edwPathForFileId(conn, create->id); if (!bamIsSortedByTarget(fileName, 1000)) { hookupFix(conn, run, create, f); } else errAbort("Looks like bad %s is already sorted\n", fileName); } else if (create->submitId == goodSubmit) { char *fileName = edwPathForFileId(conn, create->id); if (!bamIsSortedByTarget(fileName, 1000)) { errAbort("Looks like good %s needs sorting\n", fileName); } } } } carefulClose(&f); }
void edwFixRevoked(char *database, char *inFile) /* edwFixRevoked - Mark as deprecated files that are revoked in ENCODE2. */ /* inFile is in format: * metaVariable objStatus revoked [- reason] * metaObject name */ { struct sqlConnection *conn = edwConnect(); struct lineFile *lf = lineFileOpen(inFile, TRUE); char *line; char *defaultReason = "Revoked in ENCODE2"; char *reason = defaultReason; while (lineFileNextReal(lf, &line)) { if (startsWithWord("metaVariable", line)) { char *pattern = "metaVariable objStatus revoked"; if (startsWithWord(pattern, line)) { reason = skipLeadingSpaces(line + strlen(pattern)); if (isEmpty(reason)) reason = defaultReason; else { if (reason[0] == '-') reason = skipLeadingSpaces(reason + 1); reason = cloneString(reason); } } else errAbort("??? %s\n", line); } else if (startsWithWord("metaObject", line)) { char *row[3]; int wordCount = chopLine(line, row); if (wordCount != 2) errAbort("Strange metaobject line %d of %s\n", lf->lineIx, lf->fileName); char *prefix = row[1]; if (!startsWith("wgEncode", prefix)) errAbort("Strange object line %d of %s\n", lf->lineIx, lf->fileName); char query[512]; sqlSafef(query, sizeof(query), "select * from edwFile where submitFileName like '%s/%%/%s%%'", database, prefix); struct edwFile *ef, *efList = edwFileLoadByQuery(conn, query); printf("# %s %s\n", prefix, reason); for (ef = efList; ef != NULL; ef = ef->next) { long long id = ef->id; printf("update edwFile set deprecated='%s' where id=%lld;\n", reason, id); } } else errAbort("Unrecognized first word in %s\n", line); } }
void doValidatedEmail(struct edwSubmit *submit, boolean isComplete) /* Send an email with info on all validated files */ { struct sqlConnection *conn = edwConnect(); struct edwUser *user = edwUserFromId(conn, submit->userId); struct dyString *message = dyStringNew(0); /* Is this submission has no new file at all */ if ((submit->oldFiles != 0) && (submit->newFiles == 0) && (submit->metaChangeCount == 0) && isEmpty(submit->errorMessage) && (submit->fileIdInTransit == 0)) { dyStringPrintf(message, "Your submission from %s is completed, but validation was not performed for this submission since all files in validate.txt have been previously submitted and validated.\n", submit->url); mailViaPipe(user->email, "EDW Validation Results", message->string, edwDaemonEmail); sqlDisconnect(&conn); dyStringFree(&message); return; } if (isComplete) dyStringPrintf(message, "Your submission from %s is completely validated\n", submit->url); else dyStringPrintf(message, "Your submission hasn't validated after 24 hours, something is probably wrong\n" "at %s\n", submit->url); dyStringPrintf(message, "\n#accession\tsubmitted_file_name\tnotes\n"); char query[512]; sqlSafef(query, sizeof(query), "select licensePlate,submitFileName " " from edwFile left join edwValidFile on edwFile.id = edwValidFile.fileId " " where edwFile.submitId = %u and edwFile.id != %u" , submit->id, submit->submitFileId); struct sqlResult *sr = sqlGetResult(conn, query); char **row; while ((row = sqlNextRow(sr)) != NULL) { char *licensePlate = row[0]; char *submitFileName = row[1]; dyStringPrintf(message, "%s\t%s\t", naForNull(licensePlate), submitFileName); if (licensePlate == NULL) { dyStringPrintf(message, "Not validating"); } dyStringPrintf(message, "\n"); } sqlFreeResult(&sr); mailViaPipe(user->email, "EDW Validation Results", message->string, edwDaemonEmail); sqlDisconnect(&conn); dyStringFree(&message); }
struct edwScriptRegistry *edwScriptRegistryFromCgi() /* Get script registery from cgi variables. Does authentication too. */ { struct sqlConnection *conn = edwConnect(); char *user = sqlEscapeString(cgiString("user")); char *password = sqlEscapeString(cgiString("password")); char query[256]; sqlSafef(query, sizeof(query), "select * from edwScriptRegistry where name='%s'", user); struct edwScriptRegistry *reg = edwScriptRegistryLoadByQuery(conn, query); if (reg == NULL) accessDenied(); char key[EDW_SID_SIZE]; edwMakeSid(password, key); if (!sameString(reg->secretHash, key)) accessDenied(); sqlDisconnect(&conn); return reg; }
void edwToEap1(char *dir) /* edwToEap1 - Help transforme edw format analysis tables to eap formatted ones.. */ { makeDirsOnPath(dir); struct sqlConnection *conn = edwConnect(); struct edwAnalysisJob *jobList = edwAnalysisJobLoadByQuery(conn, "select * from edwAnalysisJob order by id"); char jobFile[PATH_LEN]; safef(jobFile, PATH_LEN, "%s/%s", dir, "eapJob.tab"); transformJobTable(conn, jobList, jobFile); struct edwAnalysisSoftware *swList = edwAnalysisSoftwareLoadByQuery(conn, "select * from edwAnalysisSoftware order by id"); struct hash *swHash = hashSwList(swList); char softwareFile[PATH_LEN], swVersionFile[PATH_LEN]; safef(softwareFile, PATH_LEN, "%s/%s", dir, "eapSoftware.tab"); safef(swVersionFile, PATH_LEN, "%s/%s", dir, "eapSwVersion.tab"); transformSoftwareTable(conn, swList, softwareFile, swVersionFile); struct edwAnalysisStep *stepList = edwAnalysisStepLoadByQuery(conn, "select * from edwAnalysisStep order by id"); struct hash *stepHash = hashStepList(stepList); verbose(1, "stepHash has %d els\n", stepHash->elCount); char stepFile[PATH_LEN], stepVersionFile[PATH_LEN], stepSoftwareFile[PATH_LEN]; safef(stepFile, PATH_LEN, "%s/%s", dir, "eapStep.tab"); safef(stepVersionFile, PATH_LEN, "%s/%s", dir, "eapStepVersion.tab"); safef(stepSoftwareFile, PATH_LEN, "%s/%s", dir, "eapStepSoftware.tab"); transformStepTable(conn, stepList, swHash, stepFile, stepVersionFile, stepSoftwareFile); char stepVersionSwVersionFile[PATH_LEN]; safef(stepVersionSwVersionFile, PATH_LEN, "%s/%s", dir, "eapStepSwVersion.tab"); versionVsVersion(conn, stepList, stepHash, swList, swHash, stepVersionSwVersionFile); struct edwAnalysisRun *runList = edwAnalysisRunLoadByQuery(conn, "select * from edwAnalysisRun order by id"); char analysisFile[PATH_LEN], inputFile[PATH_LEN], outputFile[PATH_LEN]; safef(analysisFile, PATH_LEN, "%s/%s", dir, "eapAnalysis.tab"); safef(inputFile, PATH_LEN, "%s/%s", dir, "eapInput.tab"); safef(outputFile, PATH_LEN, "%s/%s", dir, "eapOutput.tab"); transformRun(conn, runList, stepHash, analysisFile, inputFile, outputFile); if (optionExists("load")) { loadEapDb(dir); } }
void waitForValidationAndSendEmail(struct edwSubmit *submit, char *email) /* Poll database every 5 minute or so to see if finished. */ { int maxSeconds = 3600*24; int secondsPer = 60*5; int seconds; for (seconds = 0; seconds < maxSeconds; seconds += secondsPer) { struct sqlConnection *conn = edwConnect(); if (edwSubmitIsValidated(submit, conn)) { doValidatedEmail(submit, TRUE); return; } verbose(2, "waiting for validation\n"); sqlDisconnect(&conn); sleep(secondsPer); // Sleep for 5 more minutes } doValidatedEmail(submit, FALSE); }
void edwScriptSubmitStatus() /* edwScriptSubmitStatus - Programatically check status of submission.. */ { /* Pause a second - prevent inadvertent harsh denial of service from scripts. */ sleep(2); edwScriptRegistryFromCgi(); /* Get submission from url. */ struct sqlConnection *conn = edwConnect(); char query[512]; char *url = cgiString("url"); struct edwSubmit *sub = edwMostRecentSubmission(conn, url); char *status = NULL; if (sub == NULL) { int posInQueue = edwSubmitPositionInQueue(conn, url, NULL); if (posInQueue == -1) errAbort("%s has not been submitted", url); else status = "pending"; } else { time_t endUploadTime = sub->endUploadTime; if (!isEmpty(sub->errorMessage)) { status = "error"; } else if (endUploadTime == 0) { status = "uploading"; } else { safef(query, sizeof(query), "select count(*) from edwFile where submitId=%u and errorMessage != ''", sub->id); int errCount = sqlQuickNum(conn, query); int newValid = edwSubmitCountNewValid(sub, conn); if (newValid + errCount < sub->newFiles) status = "validating"; else if (errCount > 0) status = "error"; else status = "success"; } } /* Construct JSON result */ struct dyString *dy = dyStringNew(0); dyStringPrintf(dy, "{\n"); dyStringPrintf(dy, " \"status\": \"%s\"", status); if (sameString(status, "error")) { dyStringPrintf(dy, ",\n"); dyStringPrintf(dy, " \"errors\": [\n"); int errCount = 0; if (!isEmpty(sub->errorMessage)) { addErrFile(dy, errCount, sub->url, sub->errorMessage); ++errCount; } safef(query, sizeof(query), "select * from edwFile where submitId=%u and errorMessage != ''", sub->id); struct edwFile *file, *fileList = edwFileLoadByQuery(conn, query); for (file = fileList; file != NULL; file = file->next) { addErrFile(dy, errCount, file->submitFileName, file->errorMessage); ++errCount; } dyStringPrintf(dy, "\n ]\n"); dyStringPrintf(dy, "}\n"); } else { dyStringPrintf(dy, "\n}\n"); } /* Write out HTTP response */ printf("Content-Length: %d\r\n", dy->stringSize); puts("Content-Type: application/json; charset=UTF-8\r"); puts("\r"); printf("%s", dy->string); }
void edwFixReplaced(char *database, char *inTab, char *spikedTab, char *outSql, char *outRa) /* edwFixReplaced - Clean up files that were replaced in ENCODE2. */ { struct sqlConnection *conn = edwConnect(); struct lineFile *lf = lineFileOpen(inTab, TRUE); FILE *fSql = mustOpen(outSql, "w"); FILE *fRa = mustOpen(outRa, "w"); char *row[2]; struct hash *renameHash = rootRenameHash(); struct hash *spikedHash = hashTwoColumnFile(spikedTab); int depCount = 0, repCount = 0; while (lineFileRowTab(lf, row)) { /* Get fields in local variables. */ char *oldFileName = row[0]; char *objStatus = row[1]; /* Do spikein rename lookup. */ char *spiked = hashFindVal(spikedHash, oldFileName); if (spiked != NULL) { verbose(2, "renaming spikeing %s to %s\n", oldFileName, spiked); oldFileName = spiked; } /* Get rid of bai name for bam,bai pairs. */ char *comma = strchr(oldFileName, ','); if (comma != NULL) { if (!endsWith(comma, ".bai")) errAbort("Unexpected conjoining of files line %d of %s", lf->lineIx, lf->fileName); *comma = 0; } /* For .fastq.tgz files we got to unpack them. */ if (endsWith(oldFileName, ".fastq.tgz")) { /* Get root name - name minus suffix */ char *oldRoot = cloneString(oldFileName); chopSuffix(oldRoot); chopSuffix(oldRoot); verbose(2, "Processing fastq.tgz %s %s\n", oldFileName, oldRoot); // Find records for old version. char query[512]; sqlSafef(query, sizeof(query), "select * from edwFile where submitFileName like '%s/%%/%s.fastq.tgz.dir/%%'" " order by submitFileName", database, oldRoot); struct edwFile *oldList = edwFileLoadByQuery(conn, query); int oldCount = slCount(oldList); if (oldCount == 0) errAbort("No records match %s", query); // Find record for replaced version. // Fortunately all of the fastq.tgz's are just V2, which simplifies code a bit sqlSafef(query, sizeof(query), "select * from edwFile where submitFileName like '%s/%%/%sV2.fastq.tgz.dir/%%'" " order by submitFileName", database, oldRoot); struct edwFile *newList = edwFileLoadByQuery(conn, query); int newCount = slCount(newList); if (newCount == 0) errAbort("No records match %s", query); // Make a hash of new records keyed by new file name inside of tgz struct edwFile *newEf; struct hash *newHash = hashNew(0); for (newEf = newList; newEf != NULL; newEf = newEf->next) { char fileName[FILENAME_LEN]; splitPath(newEf->submitFileName, NULL, fileName, NULL); hashAdd(newHash, fileName, newEf); verbose(2, " %s\n", fileName); } verbose(2, "%d in oldList, %d in newList\n", oldCount, newCount); // Loop through old records trying to find corresponding new record struct edwFile *oldEf; for (oldEf = oldList; oldEf != NULL; oldEf = oldEf->next) { char fileName[FILENAME_LEN]; splitPath(oldEf->submitFileName, NULL, fileName, NULL); struct edwFile *newEf = hashFindVal(newHash, fileName); char *newName = "n/a"; fprintf(fSql, "update edwFile set deprecated='%s' where id=%u;\n", objStatus, oldEf->id); ++depCount; if (newEf != NULL) { fprintf(fSql, "update edwFile set replacedBy=%u where id=%u;\n", newEf->id, oldEf->id); newName = newEf->submitFileName; ++repCount; } fprintf(fRa, "objStatus %s\n", objStatus); fprintf(fRa, "oldFile %s\n", oldEf->submitFileName); fprintf(fRa, "newFile %s\n", newName); fprintf(fRa, "\n"); verbose(2, "%s -> %s\n", oldEf->submitFileName, newName); } } else { /* Figure out new file name by either adding V2 at end, or if there is already a V#, * replacing it. */ #ifdef SOON #endif /* SOON */ int oldVersion = 1; char *noVersion = NULL; { /* Split old file name into root and suffix. */ char *suffix = edwFindDoubleFileSuffix(oldFileName); if (suffix == NULL) errAbort("No suffix in %s line %d of %s", oldFileName, lf->lineIx, lf->fileName); char *oldRoot = cloneStringZ(oldFileName, suffix - oldFileName); char *renamed = hashFindVal(renameHash, oldRoot); if (renamed != NULL) { verbose(2, "Overriding %s with %s\n", oldRoot, renamed); oldRoot = cloneString(renamed); } /* Look for V# at end of old root, and if it's there chop it off and update oldVersion */ noVersion = oldRoot; // If no V, we done. */ char *vPos = strrchr(oldRoot, 'V'); if (vPos != NULL) { char *numPos = vPos + 1; int numSize = strlen(numPos); if (numSize == 1 || numSize == 2) { if (isAllDigits(numPos)) { oldVersion = atoi(numPos); *vPos = 0; } else errAbort("Expecting numbers after V in file name got %s line %d of %s", numPos, lf->lineIx, lf->fileName); } } verbose(2, "%s parses to %s %d %s\n", oldFileName, noVersion, oldVersion, suffix); /* Find record for old file. */ char query[512]; sqlSafef(query, sizeof(query), "select * from edwFile where submitFileName like '%s/%%/%s'", database, oldFileName); struct edwFile *oldEf = edwFileLoadByQuery(conn, query); if (slCount(oldEf) != 1) errAbort("Expecting one result got %d for %s\n", slCount(oldEf), query); fprintf(fSql, "# %s %s\n", oldFileName, objStatus); verbose(2, "%s: %s\n", oldFileName, objStatus); /* Find record for new file. */ struct edwFile *newEf = NULL; int newVersion; for (newVersion = oldVersion+1; newVersion < 7; ++newVersion) { sqlSafef(query, sizeof(query), "select * from edwFile where submitFileName like '%s/%%/%sV%d%s'", database, noVersion, newVersion, suffix); newEf = edwFileLoadByQuery(conn, query); if (newEf != NULL) break; } if (newEf == NULL) verbose(2, "Could not find next version of %s (%s)", oldFileName, oldRoot); if (slCount(newEf) > 1) errAbort("Expecting one result got %d for %s\n", slCount(newEf), query); long long oldId = oldEf->id; fprintf(fSql, "update edwFile set deprecated='%s' where id=%lld;\n", objStatus, oldId); ++depCount; char *newName = "n/a"; if (newEf != NULL) { long long newId = newEf->id; fprintf(fSql, "update edwFile set replacedBy=%lld where id=%lld;\n", newId, oldId); newName = newEf->submitFileName; ++repCount; } fprintf(fRa, "objStatus %s\n", objStatus); fprintf(fRa, "oldFile %s\n", oldEf->submitFileName); fprintf(fRa, "newFile %s\n", newName); fprintf(fRa, "\n"); verbose(2, "%s -> %s\n", oldEf->submitFileName, newName); } } } verbose(1, "%d deprecated, %d replaced\n", depCount, repCount); carefulClose(&fSql); carefulClose(&fRa); }