void verifyChunkMD5(vector<File> &files) { try { while (true) { Chunk * c = chunksToComputeMD5.consume(); if (files[c->parentFileIndex].matchStatus == File::Status::FAILED_TO_MATCH_REMOTE_FILE) { // We have already marked file as a non-match, don't waste time reading more chunks from it c->log("File status == FAILED_TO_MATCH_REMOTE_FILE, Skipping the MD5 compute..."); c->clear(); chunksSkipped.produce(c); } else { c->log("Computing MD5..."); string computedMD5 = c->computeMD5(); c->clear(); if (c->expectedMD5 != computedMD5) { c->log("MISMATCH between expected MD5 '" + c->expectedMD5 + "', and computed MD5 '" + computedMD5 + "' ... marking the file as Mismatch"); files[c->parentFileIndex].matchStatus = File::Status::FAILED_TO_MATCH_REMOTE_FILE; chunksFailed.produce(c); } else { c->log("Expected and computed MD5 match!"); chunksFinished.produce(c); } } } } catch (boost::thread_interrupted &ti) { return; } }
void compressChunks() { try { while (true) { Chunk * c = chunksToCompress.consume(); if (c->toCompress) { c->log("Compressing..."); c->compress(); c->log("Finished compressing"); } else { c->log("Not compressing"); } chunksToUpload.produce(c); // Sleep for tiny amount of time, to make sure we yield to other threads. // Note: boost::this_thread::yield() is not a valid interruption point, // so we have to use sleep() boost::this_thread::sleep(boost::posix_time::microseconds(100)); } } catch(std::bad_alloc &e) { boost::call_once(bad_alloc_once, boost::bind(&handle_bad_alloc, e)); } catch (boost::thread_interrupted &ti) { return; } }
// There is currently the possibility of a race condition if a chunk // upload timed-out. It's possible that a second upload succeeds, // has the chunk marked as "complete" and then the first request makes // its way through the queue and marks the chunk as pending again. // Since we are just about to close the file, we'll check to see if any // chunks are marked as pending, and if so, we'll retry them. void check_for_complete_chunks(vector<File> &files) { for (int currCheckNum=0; currCheckNum < NUM_CHUNK_CHECKS; ++currCheckNum){ map<string, JSON> fileDescriptions; while (!chunksFinished.empty()) { Chunk *c = chunksFinished.consume(); // Cache file descriptions so we only have to do once per file, // not once per chunk. if (fileDescriptions.find(c->fileID) == fileDescriptions.end()) fileDescriptions[c->fileID] = fileDescribe(c->fileID); if (!is_chunk_complete(c, fileDescriptions[c->fileID])) { // After the chunk was uploaded, it was cleared, removing the data // from the buffer. We need to reload if we're going to upload again. chunksToRead.produce(c); } } // All of the chunks were marked as complete, so let's exit and we // should be safeish to close the file. if(chunksToRead.size() == 0) return; // Set the totalChunks variable to the # of chunks we're going // to retry now plus the number of chunks in the failed queue. The monitor // thread will be busy until the size of chunksFinished + chunksFailed // equals totalChunks. DXLOG(logINFO) << "Retrying " << chunksToRead.size() << " chunks that did not complete."; totalChunks = chunksToRead.size() + chunksFailed.size(); // Read, compress, and upload the chunks which weren't marked as complete. createWorkerThreads(files); boost::thread monitorThread(monitor); monitorThread.join(); interruptWorkerThreads(); joinWorkerThreads(); } // We have tried to upload incomplete chunks NUM_CHUNK_CHECKS times! // Check to see if there are any chunks still not complete and if so, // print warning. map<string, JSON> fileDescriptions; while (!chunksFinished.empty()) { Chunk *c = chunksFinished.consume(); // Cache file descriptions so we only have to do once per file, // not once per chunk. if (fileDescriptions.find(c->fileID) == fileDescriptions.end()) fileDescriptions[c->fileID] = fileDescribe(c->fileID); if (!is_chunk_complete(c, fileDescriptions[c->fileID])) { cerr << "Chunk " << c->index << " of file " << c->fileID << " did not complete. This file will not be accessible. PLease try to upload this file again." << endl; } } }
void readChunks(const vector<File> &files) { try { while (true) { Chunk * c = chunksToRead.consume(); if (files[c->parentFileIndex].matchStatus == File::Status::FAILED_TO_MATCH_REMOTE_FILE) { // We have already marked file as a non-match, don't waste time reading more chunks from it c->log("File status == FAILED_TO_MATCH_REMOTE_FILE, Skipping the read..."); chunksSkipped.produce(c); } else { c->log("Reading..."); c->read(); c->log("Finished reading"); chunksToComputeMD5.produce(c); } } } catch (boost::thread_interrupted &ti) { return; } }
unsigned int File::createChunks(BlockingQueue<Chunk *> &queue) { using namespace dx; // For creating chunks: // 1) We sort the part IDs in increasing order (by pushing them all in a std::map after converting to int) // 2) We start creating file chunks if (matchStatus == Status::FAILED_TO_MATCH_REMOTE_FILE) { return 0; // we have already marked the file as a non-match, no need to create chunks } // We will do it in 2 pass .. first create all "keys" in the map .. then in second pass we will add their sizes etc map<int, JSON> chunkInfo; // Note: We are not asserting for "parts" JSON structure, since it has been already done in init() for (JSON::object_iterator it = parts.object_begin(); it != parts.object_end(); ++it) { chunkInfo[boost::lexical_cast<int>(it->first)] = JSON::parse("{\"md5\": \"" + it->second["md5"].get<string>() + "\"}"); } // second pass: Add start and end location for chunks in the local file int64_t start = 0; for (map<int, JSON>::iterator it = chunkInfo.begin(); it != chunkInfo.end(); ++it) { it->second["start"] = start; start = it->second["end"] = (start + parts[boost::lexical_cast<string>(it->first)]["size"].get<int64_t>()); } // TODO: Sanity check that this works for empty file as well LOG << "Creating chunks:" << endl; fs::path p(localFile); int actualChunksCreated = 0; for (map<int, JSON>::iterator it = chunkInfo.begin(); it != chunkInfo.end(); ++it) { Chunk *c = new Chunk(localFile, it->second["md5"].get<string>(), it->second["start"].get<int64_t>(), it->second["end"].get<int64_t>(), fileIndex); c->log("created"); queue.produce(c); actualChunksCreated++; } return actualChunksCreated; }
void uploadChunks(vector<File> &files) { try { while (true) { Chunk * c = chunksToUpload.consume(); c->log("Uploading..."); bool uploaded = false; try { c->upload(opt); uploaded = true; } catch (runtime_error &e) { ostringstream msg; msg << "Upload failed: " << e.what(); c->log(msg.str(), logERROR); } if (uploaded) { c->log("Upload succeeded!"); int64_t size_of_chunk = c->data.size(); // this can be different than (c->end - c->start) because of compression c->clear(); chunksFinished.produce(c); // Update number of bytes uploaded in parent file object boost::mutex::scoped_lock boLock(bytesUploadedMutex); files[c->parentFileIndex].bytesUploaded += (c->end - c->start); files[c->parentFileIndex].atleastOnePartDone = true; bytesUploadedSinceStart += size_of_chunk; boLock.unlock(); } else if (c->triesLeft > 0) { int numTry = NUMTRIES_g - c->triesLeft + 1; // find out which try is it int timeout = (numTry > 6) ? 256 : 4 << numTry; // timeout is always between [8, 256] seconds c->log("Will retry reading and uploading this chunks in " + boost::lexical_cast<string>(timeout) + " seconds", logWARNING); if (!opt.noRoundRobinDNS) { boost::mutex::scoped_lock forceRefreshLock(forceRefreshDNSMutex); c->log("Setting forceRefreshDNS = true in main.cpp:uploadChunks()"); forceRefreshDNS = true; // refresh the DNS list in next call to getRandomIP() } --(c->triesLeft); c->clear(); // we will read & compress data again boost::this_thread::sleep(boost::posix_time::milliseconds(timeout * 1000)); // We push the chunk to retry to "chunksToRead" and not "chunksToUpload" // Since chunksToUpload queue is bounded, and chunksToUpload.produce() can block, // thus giving rise to deadlock chunksToRead.produce(c); } else { c->log("Not retrying", logERROR); // TODO: Should we print it on stderr or DXLOG (verbose only) ?? cerr << "\nFailed to upload Chunk [" << c->start << " - " << c->end << "] for local file (" << files[c->parentFileIndex].localFile << "). APIServer response for last try: '" << c->respData << "'" << endl; c->clear(); chunksFailed.produce(c); } // Sleep for tiny amount of time, to make sure we yield to other threads. // Note: boost::this_thread::yield() is not a valid interruption point, // so we have to use sleep() boost::this_thread::sleep(boost::posix_time::microseconds(100)); } } catch(std::bad_alloc &e) { boost::call_once(bad_alloc_once, boost::bind(&handle_bad_alloc, e)); } catch (boost::thread_interrupted &ti) { return; } }