// There is currently the possibility of a race condition if a chunk // upload timed-out. It's possible that a second upload succeeds, // has the chunk marked as "complete" and then the first request makes // its way through the queue and marks the chunk as pending again. // Since we are just about to close the file, we'll check to see if any // chunks are marked as pending, and if so, we'll retry them. void check_for_complete_chunks(vector<File> &files) { for (int currCheckNum=0; currCheckNum < NUM_CHUNK_CHECKS; ++currCheckNum){ map<string, JSON> fileDescriptions; while (!chunksFinished.empty()) { Chunk *c = chunksFinished.consume(); // Cache file descriptions so we only have to do once per file, // not once per chunk. if (fileDescriptions.find(c->fileID) == fileDescriptions.end()) fileDescriptions[c->fileID] = fileDescribe(c->fileID); if (!is_chunk_complete(c, fileDescriptions[c->fileID])) { // After the chunk was uploaded, it was cleared, removing the data // from the buffer. We need to reload if we're going to upload again. chunksToRead.produce(c); } } // All of the chunks were marked as complete, so let's exit and we // should be safeish to close the file. if(chunksToRead.size() == 0) return; // Set the totalChunks variable to the # of chunks we're going // to retry now plus the number of chunks in the failed queue. The monitor // thread will be busy until the size of chunksFinished + chunksFailed // equals totalChunks. DXLOG(logINFO) << "Retrying " << chunksToRead.size() << " chunks that did not complete."; totalChunks = chunksToRead.size() + chunksFailed.size(); // Read, compress, and upload the chunks which weren't marked as complete. createWorkerThreads(files); boost::thread monitorThread(monitor); monitorThread.join(); interruptWorkerThreads(); joinWorkerThreads(); } // We have tried to upload incomplete chunks NUM_CHUNK_CHECKS times! // Check to see if there are any chunks still not complete and if so, // print warning. map<string, JSON> fileDescriptions; while (!chunksFinished.empty()) { Chunk *c = chunksFinished.consume(); // Cache file descriptions so we only have to do once per file, // not once per chunk. if (fileDescriptions.find(c->fileID) == fileDescriptions.end()) fileDescriptions[c->fileID] = fileDescribe(c->fileID); if (!is_chunk_complete(c, fileDescriptions[c->fileID])) { cerr << "Chunk " << c->index << " of file " << c->fileID << " did not complete. This file will not be accessible. PLease try to upload this file again." << endl; } } }
void compressChunks() { try { while (true) { Chunk * c = chunksToCompress.consume(); if (c->toCompress) { c->log("Compressing..."); c->compress(); c->log("Finished compressing"); } else { c->log("Not compressing"); } chunksToUpload.produce(c); // Sleep for tiny amount of time, to make sure we yield to other threads. // Note: boost::this_thread::yield() is not a valid interruption point, // so we have to use sleep() boost::this_thread::sleep(boost::posix_time::microseconds(100)); } } catch(std::bad_alloc &e) { boost::call_once(bad_alloc_once, boost::bind(&handle_bad_alloc, e)); } catch (boost::thread_interrupted &ti) { return; } }
void verifyChunkMD5(vector<File> &files) { try { while (true) { Chunk * c = chunksToComputeMD5.consume(); if (files[c->parentFileIndex].matchStatus == File::Status::FAILED_TO_MATCH_REMOTE_FILE) { // We have already marked file as a non-match, don't waste time reading more chunks from it c->log("File status == FAILED_TO_MATCH_REMOTE_FILE, Skipping the MD5 compute..."); c->clear(); chunksSkipped.produce(c); } else { c->log("Computing MD5..."); string computedMD5 = c->computeMD5(); c->clear(); if (c->expectedMD5 != computedMD5) { c->log("MISMATCH between expected MD5 '" + c->expectedMD5 + "', and computed MD5 '" + computedMD5 + "' ... marking the file as Mismatch"); files[c->parentFileIndex].matchStatus = File::Status::FAILED_TO_MATCH_REMOTE_FILE; chunksFailed.produce(c); } else { c->log("Expected and computed MD5 match!"); chunksFinished.produce(c); } } } } catch (boost::thread_interrupted &ti) { return; } }
void readChunks(const vector<File> &files) { try { while (true) { Chunk * c = chunksToRead.consume(); if (files[c->parentFileIndex].matchStatus == File::Status::FAILED_TO_MATCH_REMOTE_FILE) { // We have already marked file as a non-match, don't waste time reading more chunks from it c->log("File status == FAILED_TO_MATCH_REMOTE_FILE, Skipping the read..."); chunksSkipped.produce(c); } else { c->log("Reading..."); c->read(); c->log("Finished reading"); chunksToComputeMD5.produce(c); } } } catch (boost::thread_interrupted &ti) { return; } }
int main(int argc, char * argv[]) { try { // Note: Verbose mode logging is enabled (if requested) by options parse() opt.parse(argc, argv); } catch (exception &e) { cerr << "Error processing arguments: " << e.what() << endl; opt.printHelp(argv[0]); return 1; } if (opt.env()) { opt.setApiserverDxConfig(); // needed for 'ua --env' to report project name printEnvironmentInfo(); return 0; } if (opt.version()) { cout << "Upload Agent Version: " << UAVERSION; #if OLD_KERNEL_SUPPORT cout << " (old-kernel-support)"; #endif cout << endl << "git version: " << DXTOOLKIT_GITVERSION << endl << "libboost version: " << (BOOST_VERSION / 100000) << "." << ((BOOST_VERSION / 100) % 1000) << "." << (BOOST_VERSION % 100) << endl << "libcurl version: " << LIBCURL_VERSION_MAJOR << "." << LIBCURL_VERSION_MINOR << "." << LIBCURL_VERSION_PATCH << endl; return 0; } else if (opt.help() || opt.files.empty()) { opt.printHelp(argv[0]); return (opt.help()) ? 0 : 1; } setUserAgentString(); // also sets dx::config::USER_AGENT_STRING() DXLOG(logINFO) << "DNAnexus Upload Agent " << UAVERSION << " (git version: " << DXTOOLKIT_GITVERSION << ")"; DXLOG(logINFO) << "Upload agent's User Agent string: '" << userAgentString << "'"; DXLOG(logINFO) << "dxcpp's User Agent string: '" << dx::config::USER_AGENT_STRING() << "'"; DXLOG(logINFO) << opt; try { opt.setApiserverDxConfig(); opt.validate(); /* * Check for updates, and terminate execution if necessary. This also * has the side effect of verifying that we can connect to the API * server, and that the authentication token is valid. */ try { checkForUpdates(); } catch (runtime_error &e) { cerr << endl << e.what() << endl; return 3; } if (!opt.doNotResume) { disallowDuplicateFiles(opt.files, opt.projects); } } catch (exception &e) { cerr << endl << "ERROR: " << e.what() << endl; return 1; } const bool anyImportAppToBeCalled = (opt.reads || opt.pairedReads || opt.mappings || opt.variants); chunksToCompress.setCapacity(opt.compressThreads); chunksToUpload.setCapacity(opt.uploadThreads); int exitCode = 0; try { curlInit(); // for curl requests to be made by upload chunk request NUMTRIES_g = opt.tries; vector<File> files; for (unsigned int i = 0; i < opt.files.size(); ++i) { DXLOG(logINFO) << "Getting MIME type for local file " << opt.files[i] << "..."; string mimeType = getMimeType(opt.files[i]); DXLOG(logINFO) << "MIME type for local file " << opt.files[i] << " is '" << mimeType << "'."; bool toCompress; if (!opt.doNotCompress) { bool is_compressed = isCompressed(mimeType); toCompress = !is_compressed; if (is_compressed) DXLOG(logINFO) << "File " << opt.files[i] << " is already compressed, so won't try to compress it any further."; else DXLOG(logINFO) << "File " << opt.files[i] << " is not compressed, will compress it before uploading."; } else { toCompress = false; } if (toCompress) { mimeType = "application/x-gzip"; } files.push_back(File(opt.files[i], opt.projects[i], opt.folders[i], opt.names[i], toCompress, !opt.doNotResume, mimeType, opt.chunkSize, i)); totalChunks += files[i].createChunks(chunksToRead, opt.tries); cerr << endl; } if (opt.waitOnClose) { for (unsigned int i = 0; i < files.size(); ++i) { files[i].waitOnClose = true; } } // Create folders all at once (instead of one by one, above, where we // initialize the File objects). createFolders(opt.projects, opt.folders); // Take this point as the starting time for program operation // (to calculate average transfer speed) startTime = std::time(0); DXLOG(logINFO) << "Created " << totalChunks << " chunks."; createWorkerThreads(files); DXLOG(logINFO) << "Creating monitor thread.."; boost::thread monitorThread(monitor); boost::thread uploadProgressThread; if (opt.progress) { DXLOG(logINFO) << "Creating Upload Progress thread.."; uploadProgressThread = boost::thread(uploadProgress, boost::ref(files)); } DXLOG(logINFO) << "Joining monitor thread..."; monitorThread.join(); DXLOG(logINFO) << "Monitor thread finished."; if (opt.progress) { DXLOG(logINFO) << "Joining Upload Progress thread.."; keepShowingUploadProgress = false; uploadProgressThread.interrupt(); uploadProgressThread.join(); DXLOG(logINFO) << "Upload Progress thread finished."; } interruptWorkerThreads(); joinWorkerThreads(); while (!chunksFailed.empty()) { Chunk * c = chunksFailed.consume(); c->log("Chunk failed", logERROR); markFileAsFailed(files, c->fileID); } if (opt.verbose) { cerr << endl; } for (unsigned int i = 0; i < files.size(); ++i) { if (files[i].failed) { cerr << "File \""<< files[i].localFile << "\" could not be uploaded." << endl; } else { cerr << "File \"" << files[i].localFile << "\" was uploaded successfully. Closing..." << endl; if (files[i].isRemoteFileOpen) { files[i].close(); } } if (files[i].failed) files[i].fileID = "failed"; } DXLOG(logINFO) << "Waiting for files to be closed..."; boost::thread waitOnCloseThread(waitOnClose, boost::ref(files)); DXLOG(logINFO) << "Joining wait-on-close thread..."; waitOnCloseThread.join(); DXLOG(logINFO) << "Wait-on-close thread finished."; if (anyImportAppToBeCalled) { runImportApps(opt, files); } for (unsigned i = 0; i < files.size(); ++i) { cout << files[i].fileID; if (files[i].fileID == "failed") exitCode = 1; if (anyImportAppToBeCalled) { if (files[i].jobID == "failed") exitCode = 1; cout << "\t" << files[i].jobID; } cout << endl; } curlCleanup(); DXLOG(logINFO) << "Exiting."; } catch (bad_alloc &e) { boost::call_once(bad_alloc_once, boost::bind(&handle_bad_alloc, e)); } catch (exception &e) { curlCleanup(); cerr << endl << "ERROR: " << e.what() << endl; return 1; } return exitCode; }
void uploadChunks(vector<File> &files) { try { while (true) { Chunk * c = chunksToUpload.consume(); c->log("Uploading..."); bool uploaded = false; try { c->upload(opt); uploaded = true; } catch (runtime_error &e) { ostringstream msg; msg << "Upload failed: " << e.what(); c->log(msg.str(), logERROR); } if (uploaded) { c->log("Upload succeeded!"); int64_t size_of_chunk = c->data.size(); // this can be different than (c->end - c->start) because of compression c->clear(); chunksFinished.produce(c); // Update number of bytes uploaded in parent file object boost::mutex::scoped_lock boLock(bytesUploadedMutex); files[c->parentFileIndex].bytesUploaded += (c->end - c->start); files[c->parentFileIndex].atleastOnePartDone = true; bytesUploadedSinceStart += size_of_chunk; boLock.unlock(); } else if (c->triesLeft > 0) { int numTry = NUMTRIES_g - c->triesLeft + 1; // find out which try is it int timeout = (numTry > 6) ? 256 : 4 << numTry; // timeout is always between [8, 256] seconds c->log("Will retry reading and uploading this chunks in " + boost::lexical_cast<string>(timeout) + " seconds", logWARNING); if (!opt.noRoundRobinDNS) { boost::mutex::scoped_lock forceRefreshLock(forceRefreshDNSMutex); c->log("Setting forceRefreshDNS = true in main.cpp:uploadChunks()"); forceRefreshDNS = true; // refresh the DNS list in next call to getRandomIP() } --(c->triesLeft); c->clear(); // we will read & compress data again boost::this_thread::sleep(boost::posix_time::milliseconds(timeout * 1000)); // We push the chunk to retry to "chunksToRead" and not "chunksToUpload" // Since chunksToUpload queue is bounded, and chunksToUpload.produce() can block, // thus giving rise to deadlock chunksToRead.produce(c); } else { c->log("Not retrying", logERROR); // TODO: Should we print it on stderr or DXLOG (verbose only) ?? cerr << "\nFailed to upload Chunk [" << c->start << " - " << c->end << "] for local file (" << files[c->parentFileIndex].localFile << "). APIServer response for last try: '" << c->respData << "'" << endl; c->clear(); chunksFailed.produce(c); } // Sleep for tiny amount of time, to make sure we yield to other threads. // Note: boost::this_thread::yield() is not a valid interruption point, // so we have to use sleep() boost::this_thread::sleep(boost::posix_time::microseconds(100)); } } catch(std::bad_alloc &e) { boost::call_once(bad_alloc_once, boost::bind(&handle_bad_alloc, e)); } catch (boost::thread_interrupted &ti) { return; } }