void edwCorrectFileTags(char *tabFileName) /* edwCorrectFileTags - Use this to correct tags in the edwFile table and corresponding fields * in the edwValidFile table without forcing a validateManifest rerun or a reupload.. */ { struct sqlConnection *conn = edwConnectReadWrite(); char *requiredFields[] = {"accession",}; char *forbiddenFields[] = {"md5_sum", "size", "valid_key", "file_name"}; struct fieldedTable *table = fieldedTableFromTabFile(tabFileName, tabFileName, requiredFields, ArraySize(requiredFields)); checkForbiddenFields(table, forbiddenFields, ArraySize(forbiddenFields)); int accessionIx = stringArrayIx("accession", table->fields, table->fieldCount); struct fieldedRow *fr; for (fr = table->rowList; fr != NULL; fr = fr->next) { char *acc = fr->row[accessionIx]; long long id = edwNeedFileIdForLicensePlate(conn, acc); struct edwFile *ef = edwFileFromId(conn, id); int i; char *tags = ef->tags; for (i=0; i<table->fieldCount; ++i) { if (i != accessionIx) tags = cgiStringNewValForVar(tags, table->fields[i], fr->row[i]); } edwFileResetTags(conn, ef, tags); edwFileFree(&ef); } }
void edwMakeContaminationQa(int startId, int endId) /* edwMakeContaminationQa - Screen for contaminants by aligning against contaminant genomes.. */ { /* Make list with all files in ID range */ struct sqlConnection *conn = edwConnectReadWrite(); struct edwFile *ef, *efList = edwFileLoadIdRange(conn, startId, endId); for (ef = efList; ef != NULL; ef = ef->next) { doContaminationQa(conn, ef); } }
void edwAddQaContamTarget(char *assemblyName) /* edwAddQaContamTarget - Add a new contamination target to warehouse.. */ { struct sqlConnection *conn = edwConnectReadWrite(); char query[256 + PATH_LEN]; sqlSafef(query, sizeof(query), "select id from edwAssembly where name='%s'", assemblyName); int assemblyId = sqlQuickNum(conn, query); if (assemblyId == 0) errAbort("Assembly %s doesn't exist in warehouse. Typo or time for edwAddAssembly?", assemblyName); sqlSafef(query, sizeof(query), "insert edwQaContamTarget(assemblyId) values(%d)", assemblyId); sqlUpdate(conn, query); printf("Added target %s\n", assemblyName); }
void edwFixGtfBigBed(char *how) /* edwFixGtfBigBed - In original import the .gtf.bigBed files were bad about half the time. Cricket * caught this because a bunch of them ended up with the same md5 sum. This program regenerates them * all. */ { doReal = sameString(how, "real"); struct sqlConnection *conn = edwConnectReadWrite(); struct edwFile *redoEf, *redoList = edwFileLoadByQuery(conn, "select * from edwFile where submitFileName like '%.gtf.bigBed'"); for (redoEf = redoList; redoEf != NULL; redoEf = redoEf->next) { redoOne(conn, redoEf); } }
void doMiddle() /* doMiddle - put up middle part of web page, not including http and html headers/footers */ { printf("<FORM ACTION=\"../cgi-bin/edwWebDeprecate\" METHOD=GET>\n"); struct sqlConnection *conn = edwConnectReadWrite(edwDatabase); userEmail = edwGetEmailAndVerify(); if (userEmail == NULL) logIn(); else if (cgiVarExists("fileList") && cgiVarExists("reason")) tryToDeprecate(conn); else getFileListAndReason(conn); printf("</FORM>"); }
void edwRunOnIds(char *program, char *queryString) /* edwRunOnIds - Run a edw command line program (one that takes startId endId as it's two parameters) for a range of ids, * putting it on edwJob queue. */ { struct sqlConnection *conn = edwConnectReadWrite(); struct slName *id, *idList = sqlQuickList(conn, queryString); for (id = idList; id != NULL; id = id->next) { char query[512]; sqlSafef(query, sizeof(query), "insert into %s (commandLine) values ('%s %s %s')", runTable, program, id->name, id->name); sqlUpdate(conn, query); } }
void edwMakeRepeatQa(int startFileId, int endFileId) /* edwMakeRepeatQa - Figure out what proportion of things align to repeats.. */ { struct sqlConnection *conn = edwConnectReadWrite(); struct edwFile *ef, *efList = edwFileAllIntactBetween(conn, startFileId, endFileId); for (ef = efList; ef != NULL; ef = ef->next) { struct edwValidFile *vf = edwValidFileFromFileId(conn, ef->id); if (vf != NULL) { if (sameString(vf->format, "fastq")) fastqRepeatQa(conn, ef, vf); } } sqlDisconnect(&conn); }
void edwCreateNewUser(char *email) /* Create new user, checking that user does not already exist. */ { /* Now make sure user is not already in user table. */ struct sqlConnection *conn = edwConnectReadWrite(); struct dyString *query = dyStringNew(0); sqlDyStringPrintf(query, "select count(*) from edwUser where email = '%s'", email); if (sqlQuickNum(conn, query->string) > 0) errAbort("User %s already exists", email); /* Do database insert. */ dyStringClear(query); sqlDyStringPrintf(query, "insert into edwUser (email) values('%s')", email); sqlUpdate(conn, query->string); sqlDisconnect(&conn); }
void edwFixTargetSeq(char *when) /* edwFixTargetSeq - Fill in new fields about target seq to edwBamFile and edwAssembly.. */ { struct sqlConnection *conn = edwConnectReadWrite(); struct edwAssembly *as, *asList = edwAssemblyLoadByQuery(conn, "select * from edwAssembly"); char query[512]; for (as = asList; as != NULL; as = as->next) { char *twoBitFileName = edwPathForFileId(conn, as->twoBitId); struct twoBitFile *tbf = twoBitOpen(twoBitFileName); safef(query, sizeof(query), "update edwAssembly set seqCount=%u where id=%u", tbf->seqCount, as->id); sqlUpdate(conn, query); freez(&twoBitFileName); twoBitClose(&tbf); } edwAssemblyFreeList(&asList); struct edwBamFile *bam, *bamList = edwBamFileLoadByQuery(conn, "select * from edwBamFile"); for (bam = bamList; bam != NULL; bam = bam->next) { char *fileName = edwPathForFileId(conn, bam->fileId); samfile_t *sf = samopen(fileName, "rb", NULL); if (sf == NULL) errnoAbort("Couldn't open %s.\n", fileName); bam_header_t *head = sf->header; if (head == NULL) errAbort("Aborting ... Bad BAM header in file: %s", fileName); /* Sum up some target sizes. */ long long targetBaseCount = 0; /* Total size of all bases in target seq */ int i; for (i=0; i<head->n_targets; ++i) targetBaseCount += head->target_len[i]; safef(query, sizeof(query), "update edwBamFile set targetBaseCount=%lld,targetSeqCount=%u where id=%u", targetBaseCount, (unsigned)head->n_targets, bam->id); sqlUpdate(conn, query); samclose(sf); freez(&fileName); } }
void doMiddle() /* doMiddle - put up middle part of web page, not including http and html headers/footers */ { pushWarnHandler(localWarn); printf("<FORM ACTION=\"../cgi-bin/edwWebSubmit\" METHOD=GET>\n"); struct sqlConnection *conn = edwConnectReadWrite(edwDatabase); userEmail = edwGetEmailAndVerify(); if (userEmail == NULL) logIn(); else if (cgiVarExists(stopButtonName)) stopUpload(conn); else if (cgiVarExists("submitUrl")) submitUrl(conn); else if (cgiVarExists("monitor")) monitorSubmission(conn); else getUrl(conn); printf("</FORM>"); }
void loadEapDb(char *dir) /* Load up EAP portion of database from tab file directory. */ { struct sqlConnection *conn = edwConnectReadWrite(); char *tables[] = {"eapJob", "eapSoftware", "eapSwVersion", "eapStep", "eapStepSoftware", "eapStepVersion", "eapStepSwVersion", "eapAnalysis", "eapInput", "eapOutput",}; int i; for (i=0; i<ArraySize(tables); ++i) { char *table = tables[i]; /* Make up tab separated file name and ask database to load files into the table. */ char tabName[PATH_LEN]; safef(tabName, PATH_LEN, "%s/%s.tab", dir, table); char query[2*PATH_MAX]; sqlSafef(query, sizeof(query), "load data local infile '%s' into table %s", tabName, table); verbose(2, "%s\n", query); sqlUpdate(conn, query); } }
void edwChangeFormat(char *format, int idCount, char *idStrings[]) /* edwChangeFormat - Change format and force a revalidation for a file.. */ { struct sqlConnection *conn = edwConnectReadWrite(); /* Convert ascii id's to valid file ids so we catch errors early. */ long long ids[idCount]; struct edwValidFile *vfs[idCount]; int i; for (i=0; i<idCount; ++i) { long long id = ids[i] = sqlLongLong(idStrings[i]); struct edwValidFile *vf = vfs[i] = edwValidFileFromFileId(conn, id); if (vf == NULL) errAbort("%lld is not a fileId in the edwValidFile table", id); } /* Loop through each file and change format. */ for (i=0; i<idCount; ++i) { changeFormat(conn, vfs[i], format); } sqlDisconnect(&conn); }
void doMiddle() /* Write what goes between BODY and /BODY */ { pushWarnHandler(localWarn); if (!cgiServerHttpsIsOn()) usage(); struct sqlConnection *conn = edwConnectReadWrite(); printf("<FORM ACTION=\"edwWebRegisterScript\" METHOD=POST>\n"); printf("<B>Register Script with ENCODE Data Warehouse</B><BR>\n"); #ifdef SOON uglyf("HTTP_AUTHENTICATION: '%s'<BR>\n", getenv("HTTP_AUTHENTICATION")); uglyf("HTTP_AUTHORIZATION: '%s'<BR>\n", getenv("HTTP_AUTHORIZATION")); dumpEnv(mainEnv); #endif if (userEmail == NULL) { printf("Please sign in:"); printf("<INPUT TYPE=BUTTON NAME=\"signIn\" VALUE=\"sign in\" id=\"signin\">"); } else if (cgiVarExists("description")) { struct edwUser *user = edwUserFromEmail(conn, userEmail); if (user == NULL) edwWarnUnregisteredUser(userEmail); else { char password[HEXED_32_SIZE]; edwRandomHexed32(password); char babyName[HEXED_32_SIZE]; edwRandomBabble(babyName, sizeof(babyName)); edwRegisterScript(conn, user, babyName, password, cgiString("description")); printf("Script now registered.<BR>\n"); printf("The script user name is %s.<BR>\n", babyName); printf("The script password is %s.<BR>\n", password); printf("Please save the script user name and password somewhere. "); puts("Please pass these two and the URL"); puts(" of your validated manifest file (validated.txt) to our server to submit data."); puts("Construct a URL of the form:<BR>"); printf("<PRE>https://encodedcc.sdsc.edu/cgi-bin/edwScriptSubmit" "?user=%s&password=%s&url=%s\n</PRE>", babyName, password, cgiEncode("http://your.host.edu/your_dir/validated.txt")); puts("That is pass the CGI encoded variables user, password, and url to the "); puts("web services CGI at"); puts("https://encodedcc.sdsc.edu/cgi-bin/edwScriptSubmit. "); puts("You can use the http://encodedcc.sdsc.edu/cgi-bin/edwWebBrowse site to "); puts("monitor your submission interactively. Please contact your wrangler if you "); puts("have any questions.<BR>"); cgiMakeButton("submit", "Register another script"); } printf(" "); edwPrintLogOutButton(); } else { struct edwUser *user = edwUserFromEmail(conn, userEmail); edwPrintLogOutButton(); if (user == NULL) edwWarnUnregisteredUser(userEmail); else { printf("%s is authorized to register a new script<BR>\n", userEmail); printf("<BR>Script description:\n"); cgiMakeTextVar("description", NULL, 80); cgiMakeSubmitButton(); } } printf("</FORM>\n"); }
void edwSubmit(char *submitUrl, char *email) /* edwSubmit - Submit URL with validated.txt to warehouse. */ { /* Parse out url a little into submitDir and submitFile */ char *lastSlash = strrchr(submitUrl, '/'); if (lastSlash == NULL) errAbort("%s is not a valid URL - it has no '/' in it.", submitUrl); char *submitFile = lastSlash+1; int submitDirSize = submitFile - submitUrl; char submitDir[submitDirSize+1]; memcpy(submitDir, submitUrl, submitDirSize); submitDir[submitDirSize] = 0; // Add trailing zero /* Make sure user has access. */ struct sqlConnection *conn = edwConnectReadWrite(); struct edwUser *user = edwMustGetUserFromEmail(conn, email); int userId = user->id; /* See if we are already running on same submission. If so council patience and quit. */ notOverlappingSelf(conn, submitUrl); /* Make a submit record. */ int submitId = makeNewEmptySubmitRecord(conn, submitUrl, userId); /* The next errCatch block will fill these in if all goes well. */ struct submitFileRow *sfrList = NULL, *oldList = NULL, *newList = NULL; int oldCount = 0; long long oldBytes = 0, newBytes = 0, byteCount = 0; /* Start catching errors from here and writing them in submitId. If we don't * throw we'll end up having a list of all files in the submit in sfrList. */ struct errCatch *errCatch = errCatchNew(); char query[1024]; if (errCatchStart(errCatch)) { /* Make sure they got a bit of space, enough for a reasonable submit file. * We do this here just because we can make error message more informative. */ long long diskFreeSpace = freeSpaceOnFileSystem(edwRootDir); if (diskFreeSpace < 4*1024*1024) errAbort("No space left in warehouse!"); /* Open remote submission file. This is most likely where we will fail. */ int hostId=0, submitDirId = 0; long long startUploadTime = edwNow(); int remoteFd = edwOpenAndRecordInDir(conn, submitDir, submitFile, submitUrl, &hostId, &submitDirId); /* Copy to local temp file. */ char tempSubmitFile[PATH_LEN]; fetchFdToTempFile(remoteFd, tempSubmitFile); mustCloseFd(&remoteFd); long long endUploadTime = edwNow(); /* Calculate MD5 sum, and see if we already have such a file. */ char *md5 = md5HexForFile(tempSubmitFile); int fileId = findFileGivenMd5AndSubmitDir(conn, md5, submitDirId); /* If we already have it, then delete temp file, otherwise put file in file table. */ char submitLocalPath[PATH_LEN]; if (fileId != 0) { remove(tempSubmitFile); char submitRelativePath[PATH_LEN]; sqlSafef(query, sizeof(query), "select edwFileName from edwFile where id=%d", fileId); sqlNeedQuickQuery(conn, query, submitRelativePath, sizeof(submitRelativePath)); safef(submitLocalPath, sizeof(submitLocalPath), "%s%s", edwRootDir, submitRelativePath); } else { /* Looks like it's the first time we've seen this submission file, so * save the file itself. We'll get to the records inside the file in a bit. */ fileId = makeNewEmptyFileRecord(conn, submitId, submitDirId, submitFile, 0); /* Get file/path names for submission file inside warehouse. */ char edwFile[PATH_LEN]; edwMakeFileNameAndPath(fileId, submitFile, edwFile, submitLocalPath); /* Move file to final resting place and get update time and size from local file system. */ mustRename(tempSubmitFile, submitLocalPath); time_t updateTime = fileModTime(submitLocalPath); off_t size = fileSize(submitLocalPath); /* Update file table which now should be complete including updateTime. */ sqlSafef(query, sizeof(query), "update edwFile set " " updateTime=%lld, size=%lld, md5='%s', edwFileName='%s'," " startUploadTime=%lld, endUploadTime=%lld" " where id=%u\n", (long long)updateTime, (long long)size, md5, edwFile, startUploadTime, endUploadTime, fileId); sqlUpdate(conn, query); } /* By now there is a submit file on the local file system. We parse it out. */ edwParseSubmitFile(conn, submitLocalPath, submitUrl, &sfrList); /* Save our progress so far to submit table. */ sqlSafef(query, sizeof(query), "update edwSubmit" " set submitFileId=%lld, submitDirId=%lld, fileCount=%d where id=%d", (long long)fileId, (long long)submitDirId, slCount(sfrList), submitId); sqlUpdate(conn, query); /* Weed out files we already have. */ struct submitFileRow *sfr, *sfrNext; for (sfr = sfrList; sfr != NULL; sfr = sfrNext) { sfrNext = sfr->next; struct edwFile *bf = sfr->file; long long fileId; if ((fileId = edwGotFile(conn, submitDir, bf->submitFileName, bf->md5, bf->size)) >= 0) { ++oldCount; oldBytes += bf->size; sfr->md5MatchFileId = fileId; slAddHead(&oldList, sfr); } else slAddHead(&newList, sfr); byteCount += bf->size; } sfrList = NULL; slReverse(&newList); slReverse(&oldList); /* Update database with oldFile count. */ sqlSafef(query, sizeof(query), "update edwSubmit set oldFiles=%d,oldBytes=%lld,byteCount=%lld where id=%u", oldCount, oldBytes, byteCount, submitId); sqlUpdate(conn, query); /* Deal with old files. This may throw an error. We do it before downloading new * files since we want to fail fast if we are going to fail. */ int updateCount = handleOldFileTags(conn, oldList, doUpdate); sqlSafef(query, sizeof(query), "update edwSubmit set metaChangeCount=%d where id=%u", updateCount, submitId); sqlUpdate(conn, query); } errCatchEnd(errCatch); if (errCatch->gotError) { handleSubmitError(conn, submitId, errCatch->message->string); /* The handleSubmitError will keep on throwing. */ } errCatchFree(&errCatch); /* Go through list attempting to load the files if we don't already have them. */ struct submitFileRow *sfr; for (sfr = newList; sfr != NULL; sfr = sfr->next) { if (edwSubmitShouldStop(conn, submitId)) break; struct edwFile *bf = sfr->file; int submitUrlSize = strlen(submitDir) + strlen(bf->submitFileName) + 1; char submitUrl[submitUrlSize]; safef(submitUrl, submitUrlSize, "%s%s", submitDir, bf->submitFileName); if (edwGotFile(conn, submitDir, bf->submitFileName, bf->md5, bf->size)<0) { /* We can't get a ID for this file. There's two possible reasons - * either somebody is in the middle of fetching it or nobody's started. * If somebody is in the middle of fetching it, assume they died * if they took more than an hour, and start up another fetch. * So here we fetch unless somebody else is fetching recently. */ if (edwGettingFile(conn, submitDir, bf->submitFileName) < 0) { verbose(1, "Fetching %s\n", bf->submitFileName); getSubmittedFile(conn, bf, submitDir, submitUrl, submitId); newBytes += bf->size; sqlSafef(query, sizeof(query), "update edwSubmit set newFiles=newFiles+1,newBytes=%lld where id=%d", newBytes, submitId); sqlUpdate(conn, query); } } else { verbose(2, "Already got %s\n", bf->submitFileName); sqlSafef(query, sizeof(query), "update edwSubmit set oldFiles=oldFiles+1 where id=%d", submitId); sqlUpdate(conn, query); } if (sfr->replacesFile != 0) { /* What happens when the replacement doesn't validate? */ verbose(2, "Replacing %s with %s\n", sfr->replaces, bf->submitFileName); sqlSafef(query, sizeof(query), "update edwFile set replacedBy=%u, deprecated='%s' where id=%u", bf->id, sfr->replaceReason, sfr->replacesFile); sqlUpdate(conn, query); } } /* If we made it here, update submit endUploadTime */ sqlSafef(query, sizeof(query), "update edwSubmit set endUploadTime=%lld where id=%d", edwNow(), submitId); sqlUpdate(conn, query); /* Get a real submission record and then set things up so mail user when all done. */ struct edwSubmit *submit = edwSubmitFromId(conn, submitId); sqlDisconnect(&conn); // We'll be waiting a while so free connection waitForValidationAndSendEmail(submit, email); }