void openSeekRead(char *filename, bits64 offset, bits64 len, char *buf) /* Read len bits starting at offset from filename into buf or die. */ { int fd = mustOpenFd(filename, O_RDONLY); mustLseek(fd, offset, SEEK_SET); mustReadFd(fd, buf, len); mustCloseFd(&fd); }
void edwReserveTempFile(char *path) /* Call mkstemp on path. This will fill in terminal XXXXXX in path with file name * and create an empty file of that name. Generally that empty file doesn't stay empty for long. */ { int fd = mkstemp(path); if (fd == -1) errnoAbort("Couldn't create temp file %s", path); mustCloseFd(&fd); }
void fetchFdToTempFile(int remoteFd, char tempFileName[PATH_LEN]) /* This will fetch remote data to a temporary file. It fills in tempFileName with the name. */ { /* Now make temp file name with XXXXXX name at end */ safef(tempFileName, PATH_LEN, "%sedwSubmitXXXXXX", edwTempDir()); /* Get open file handle, copy file, and close. */ int localFd = mustMkstemp(tempFileName); cpFile(remoteFd, localFd); mustCloseFd(&localFd); }
void edwSubmit(char *submitUrl, char *email) /* edwSubmit - Submit URL with validated.txt to warehouse. */ { /* Parse out url a little into submitDir and submitFile */ char *lastSlash = strrchr(submitUrl, '/'); if (lastSlash == NULL) errAbort("%s is not a valid URL - it has no '/' in it.", submitUrl); char *submitFile = lastSlash+1; int submitDirSize = submitFile - submitUrl; char submitDir[submitDirSize+1]; memcpy(submitDir, submitUrl, submitDirSize); submitDir[submitDirSize] = 0; // Add trailing zero /* Make sure user has access. */ struct sqlConnection *conn = edwConnectReadWrite(); struct edwUser *user = edwMustGetUserFromEmail(conn, email); int userId = user->id; /* See if we are already running on same submission. If so council patience and quit. */ notOverlappingSelf(conn, submitUrl); /* Make a submit record. */ int submitId = makeNewEmptySubmitRecord(conn, submitUrl, userId); /* The next errCatch block will fill these in if all goes well. */ struct submitFileRow *sfrList = NULL, *oldList = NULL, *newList = NULL; int oldCount = 0; long long oldBytes = 0, newBytes = 0, byteCount = 0; /* Start catching errors from here and writing them in submitId. If we don't * throw we'll end up having a list of all files in the submit in sfrList. */ struct errCatch *errCatch = errCatchNew(); char query[1024]; if (errCatchStart(errCatch)) { /* Make sure they got a bit of space, enough for a reasonable submit file. * We do this here just because we can make error message more informative. */ long long diskFreeSpace = freeSpaceOnFileSystem(edwRootDir); if (diskFreeSpace < 4*1024*1024) errAbort("No space left in warehouse!"); /* Open remote submission file. This is most likely where we will fail. */ int hostId=0, submitDirId = 0; long long startUploadTime = edwNow(); int remoteFd = edwOpenAndRecordInDir(conn, submitDir, submitFile, submitUrl, &hostId, &submitDirId); /* Copy to local temp file. */ char tempSubmitFile[PATH_LEN]; fetchFdToTempFile(remoteFd, tempSubmitFile); mustCloseFd(&remoteFd); long long endUploadTime = edwNow(); /* Calculate MD5 sum, and see if we already have such a file. */ char *md5 = md5HexForFile(tempSubmitFile); int fileId = findFileGivenMd5AndSubmitDir(conn, md5, submitDirId); /* If we already have it, then delete temp file, otherwise put file in file table. */ char submitLocalPath[PATH_LEN]; if (fileId != 0) { remove(tempSubmitFile); char submitRelativePath[PATH_LEN]; sqlSafef(query, sizeof(query), "select edwFileName from edwFile where id=%d", fileId); sqlNeedQuickQuery(conn, query, submitRelativePath, sizeof(submitRelativePath)); safef(submitLocalPath, sizeof(submitLocalPath), "%s%s", edwRootDir, submitRelativePath); } else { /* Looks like it's the first time we've seen this submission file, so * save the file itself. We'll get to the records inside the file in a bit. */ fileId = makeNewEmptyFileRecord(conn, submitId, submitDirId, submitFile, 0); /* Get file/path names for submission file inside warehouse. */ char edwFile[PATH_LEN]; edwMakeFileNameAndPath(fileId, submitFile, edwFile, submitLocalPath); /* Move file to final resting place and get update time and size from local file system. */ mustRename(tempSubmitFile, submitLocalPath); time_t updateTime = fileModTime(submitLocalPath); off_t size = fileSize(submitLocalPath); /* Update file table which now should be complete including updateTime. */ sqlSafef(query, sizeof(query), "update edwFile set " " updateTime=%lld, size=%lld, md5='%s', edwFileName='%s'," " startUploadTime=%lld, endUploadTime=%lld" " where id=%u\n", (long long)updateTime, (long long)size, md5, edwFile, startUploadTime, endUploadTime, fileId); sqlUpdate(conn, query); } /* By now there is a submit file on the local file system. We parse it out. */ edwParseSubmitFile(conn, submitLocalPath, submitUrl, &sfrList); /* Save our progress so far to submit table. */ sqlSafef(query, sizeof(query), "update edwSubmit" " set submitFileId=%lld, submitDirId=%lld, fileCount=%d where id=%d", (long long)fileId, (long long)submitDirId, slCount(sfrList), submitId); sqlUpdate(conn, query); /* Weed out files we already have. */ struct submitFileRow *sfr, *sfrNext; for (sfr = sfrList; sfr != NULL; sfr = sfrNext) { sfrNext = sfr->next; struct edwFile *bf = sfr->file; long long fileId; if ((fileId = edwGotFile(conn, submitDir, bf->submitFileName, bf->md5, bf->size)) >= 0) { ++oldCount; oldBytes += bf->size; sfr->md5MatchFileId = fileId; slAddHead(&oldList, sfr); } else slAddHead(&newList, sfr); byteCount += bf->size; } sfrList = NULL; slReverse(&newList); slReverse(&oldList); /* Update database with oldFile count. */ sqlSafef(query, sizeof(query), "update edwSubmit set oldFiles=%d,oldBytes=%lld,byteCount=%lld where id=%u", oldCount, oldBytes, byteCount, submitId); sqlUpdate(conn, query); /* Deal with old files. This may throw an error. We do it before downloading new * files since we want to fail fast if we are going to fail. */ int updateCount = handleOldFileTags(conn, oldList, doUpdate); sqlSafef(query, sizeof(query), "update edwSubmit set metaChangeCount=%d where id=%u", updateCount, submitId); sqlUpdate(conn, query); } errCatchEnd(errCatch); if (errCatch->gotError) { handleSubmitError(conn, submitId, errCatch->message->string); /* The handleSubmitError will keep on throwing. */ } errCatchFree(&errCatch); /* Go through list attempting to load the files if we don't already have them. */ struct submitFileRow *sfr; for (sfr = newList; sfr != NULL; sfr = sfr->next) { if (edwSubmitShouldStop(conn, submitId)) break; struct edwFile *bf = sfr->file; int submitUrlSize = strlen(submitDir) + strlen(bf->submitFileName) + 1; char submitUrl[submitUrlSize]; safef(submitUrl, submitUrlSize, "%s%s", submitDir, bf->submitFileName); if (edwGotFile(conn, submitDir, bf->submitFileName, bf->md5, bf->size)<0) { /* We can't get a ID for this file. There's two possible reasons - * either somebody is in the middle of fetching it or nobody's started. * If somebody is in the middle of fetching it, assume they died * if they took more than an hour, and start up another fetch. * So here we fetch unless somebody else is fetching recently. */ if (edwGettingFile(conn, submitDir, bf->submitFileName) < 0) { verbose(1, "Fetching %s\n", bf->submitFileName); getSubmittedFile(conn, bf, submitDir, submitUrl, submitId); newBytes += bf->size; sqlSafef(query, sizeof(query), "update edwSubmit set newFiles=newFiles+1,newBytes=%lld where id=%d", newBytes, submitId); sqlUpdate(conn, query); } } else { verbose(2, "Already got %s\n", bf->submitFileName); sqlSafef(query, sizeof(query), "update edwSubmit set oldFiles=oldFiles+1 where id=%d", submitId); sqlUpdate(conn, query); } if (sfr->replacesFile != 0) { /* What happens when the replacement doesn't validate? */ verbose(2, "Replacing %s with %s\n", sfr->replaces, bf->submitFileName); sqlSafef(query, sizeof(query), "update edwFile set replacedBy=%u, deprecated='%s' where id=%u", bf->id, sfr->replaceReason, sfr->replacesFile); sqlUpdate(conn, query); } } /* If we made it here, update submit endUploadTime */ sqlSafef(query, sizeof(query), "update edwSubmit set endUploadTime=%lld where id=%d", edwNow(), submitId); sqlUpdate(conn, query); /* Get a real submission record and then set things up so mail user when all done. */ struct edwSubmit *submit = edwSubmitFromId(conn, submitId); sqlDisconnect(&conn); // We'll be waiting a while so free connection waitForValidationAndSendEmail(submit, email); }
int edwFileFetch(struct sqlConnection *conn, struct edwFile *ef, int fd, char *submitFileName, unsigned submitId, unsigned submitDirId, unsigned hostId) /* Fetch file and if successful update a bunch of the fields in ef with the result. * Returns fileId. */ { ef->id = makeNewEmptyFileRecord(conn, submitId, submitDirId, ef->submitFileName, ef->size); /* Update edwSubmit with file in transit info */ char query[256]; sqlSafef(query, sizeof(query), "update edwSubmit set fileIdInTransit=%lld where id=%u", (long long)ef->id, submitId); sqlUpdate(conn, query); sqlSafef(query, sizeof(query), "select paraFetchStreams from edwHost where id=%u", hostId); int paraFetchStreams = sqlQuickNum(conn, query); struct paraFetchInterruptContext interruptContext = {.conn=conn, .submitId=submitId}; /* Wrap getting the file, the actual data transfer, with an error catcher that * will remove partly uploaded files. Perhaps some day we'll attempt to rescue * ones that are just truncated by downloading the rest, but not now. */ struct errCatch *errCatch = errCatchNew(); char tempName[PATH_LEN] = ""; char edwFile[PATH_LEN] = "", edwPath[PATH_LEN]; if (errCatchStart(errCatch)) { /* Now make temp file name and open temp file in an atomic operation */ char *tempDir = edwTempDir(); safef(tempName, PATH_LEN, "%sedwSubmitXXXXXX", tempDir); int localFd = mustMkstemp(tempName); /* Update file name in database with temp file name so web app can track us. */ char query[PATH_LEN+128]; sqlSafef(query, sizeof(query), "update edwFile set edwFileName='%s' where id=%lld", tempName + strlen(edwRootDir), (long long)ef->id); sqlUpdate(conn, query); /* Do actual upload tracking how long it takes. */ ef->startUploadTime = edwNow(); mustCloseFd(&localFd); if (!parallelFetchInterruptable(submitFileName, tempName, paraFetchStreams, 4, FALSE, FALSE, paraFetchInterruptFunction, &interruptContext)) { if (interruptContext.isInterrupted) errAbort("Submission stopped by user."); else errAbort("parallel fetch of %s failed", submitFileName); } ef->endUploadTime = edwNow(); /* Rename file both in file system and (via ef) database. */ edwMakeFileNameAndPath(ef->id, submitFileName, edwFile, edwPath); mustRename(tempName, edwPath); if (endsWith(edwPath, ".gz") && !encode3IsGzipped(edwPath)) errAbort("%s has .gz suffix, but is not gzipped", submitFileName); ef->edwFileName = cloneString(edwFile); } errCatchEnd(errCatch); if (errCatch->gotError) { /* Attempt to remove any partial file. */ if (tempName[0] != 0) { verbose(1, "Removing partial %s\n", tempName); parallelFetchRemovePartial(tempName); remove(tempName); } handleSubmitError(conn, submitId, errCatch->message->string); // Throws further assert(FALSE); // We never get here } errCatchFree(&errCatch); /* Now we got the file. We'll go ahead and save the file name and stuff. */ sqlSafef(query, sizeof(query), "update edwFile set" " edwFileName='%s', startUploadTime=%lld, endUploadTime=%lld" " where id = %d" , ef->edwFileName, ef->startUploadTime, ef->endUploadTime, ef->id); sqlUpdate(conn, query); /* Wrap the validations in an error catcher that will save error to file table in database */ errCatch = errCatchNew(); boolean success = FALSE; if (errCatchStart(errCatch)) { /* Check MD5 sum here. */ unsigned char md5bin[16]; md5ForFile(edwPath, md5bin); char md5[33]; hexBinaryString(md5bin, sizeof(md5bin), md5, sizeof(md5)); if (!sameWord(md5, ef->md5)) errAbort("%s has md5 mismatch: %s != %s. File may be corrupted in upload, or file may have " "been changed since validateManifest was run. Please check that md5 of file " "before upload is really %s. If it is then try submitting again, otherwise " "rerun validateManifest and then try submitting again. \n", ef->submitFileName, ef->md5, md5, ef->md5); /* Finish updating a bunch more of edwFile record. Note there is a requirement in * the validFile section that ef->updateTime be updated last. A nonzero ef->updateTime * is used as a sign of record complete. */ struct dyString *dy = dyStringNew(0); /* Includes tag so query may be long */ sqlDyStringPrintf(dy, "update edwFile set md5='%s',size=%lld,updateTime=%lld", md5, ef->size, ef->updateTime); dyStringAppend(dy, ", tags='"); dyStringAppend(dy, ef->tags); dyStringPrintf(dy, "' where id=%d", ef->id); sqlUpdate(conn, dy->string); dyStringFree(&dy); /* Update edwSubmit so file no longer shown as in transit */ sqlSafef(query, sizeof(query), "update edwSubmit set fileIdInTransit=0 where id=%u", submitId); sqlUpdate(conn, query); success = TRUE; } errCatchEnd(errCatch); if (errCatch->gotError) { handleFileError(conn, submitId, ef->id, errCatch->message->string); } return ef->id; }