Beispiel #1
0
// There is currently the possibility of a race condition if a chunk
// upload timed-out.  It's possible that a second upload succeeds,
// has the chunk marked as "complete" and then the first request makes
// its way through the queue and marks the chunk as pending again.
// Since we are just about to close the file, we'll check to see if any
// chunks are marked as pending, and if so, we'll retry them.
void check_for_complete_chunks(vector<File> &files) {
  for (int currCheckNum=0; currCheckNum < NUM_CHUNK_CHECKS; ++currCheckNum){
    map<string, JSON> fileDescriptions;
    while (!chunksFinished.empty()) {
      Chunk *c = chunksFinished.consume();

      // Cache file descriptions so we only have to do once per file,
      // not once per chunk.
      if (fileDescriptions.find(c->fileID) == fileDescriptions.end())
        fileDescriptions[c->fileID] = fileDescribe(c->fileID);

      if (!is_chunk_complete(c, fileDescriptions[c->fileID])) {
        // After the chunk was uploaded, it was cleared, removing the data
        // from the buffer.  We need to reload if we're going to upload again.
        chunksToRead.produce(c);
      }
    }
    // All of the chunks were marked as complete, so let's exit and we
    // should be safeish to close the file.
    if(chunksToRead.size() == 0)
      return;

    // Set the totalChunks variable to the # of chunks we're going
    // to retry now plus the number of chunks in the failed queue.  The monitor
    // thread will be busy until the size of chunksFinished + chunksFailed
    // equals totalChunks.
    DXLOG(logINFO) << "Retrying " << chunksToRead.size() << " chunks that did not complete.";
    totalChunks = chunksToRead.size() + chunksFailed.size();
    // Read, compress, and upload the chunks which weren't marked as complete.
    createWorkerThreads(files);

    boost::thread monitorThread(monitor);
    monitorThread.join();

    interruptWorkerThreads();
    joinWorkerThreads();
  }

  // We have tried to upload incomplete chunks NUM_CHUNK_CHECKS times!
  // Check to see if there are any chunks still not complete and if so,
  // print warning.
  map<string, JSON> fileDescriptions;
  while (!chunksFinished.empty()) {
    Chunk *c = chunksFinished.consume();

    // Cache file descriptions so we only have to do once per file,
    // not once per chunk.
    if (fileDescriptions.find(c->fileID) == fileDescriptions.end())
        fileDescriptions[c->fileID] = fileDescribe(c->fileID);

    if (!is_chunk_complete(c, fileDescriptions[c->fileID])) {
        cerr << "Chunk " << c->index << " of file " << c->fileID << " did not complete.  This file will not be accessible.  PLease try to upload this file again." << endl;
    }
  }
}
Beispiel #2
0
void compressChunks() {
    try {
        while (true) {
            Chunk * c = chunksToCompress.consume();

            if (c->toCompress) {
                c->log("Compressing...");
                c->compress();
                c->log("Finished compressing");
            } else {
                c->log("Not compressing");
            }

            chunksToUpload.produce(c);

            // Sleep for tiny amount of time, to make sure we yield to other threads.
            // Note: boost::this_thread::yield() is not a valid interruption point,
            //       so we have to use sleep()
            boost::this_thread::sleep(boost::posix_time::microseconds(100));
        }
    } catch(std::bad_alloc &e) {
        boost::call_once(bad_alloc_once, boost::bind(&handle_bad_alloc, e));
    } catch (boost::thread_interrupted &ti) {
        return;
    }
}
Beispiel #3
0
void verifyChunkMD5(vector<File> &files) {
  try {
    while (true) {
      Chunk * c = chunksToComputeMD5.consume();
      if (files[c->parentFileIndex].matchStatus == File::Status::FAILED_TO_MATCH_REMOTE_FILE) {
        // We have already marked file as a non-match, don't waste time reading more chunks from it
        c->log("File status == FAILED_TO_MATCH_REMOTE_FILE, Skipping the MD5 compute...");
        c->clear();
        chunksSkipped.produce(c);
      } else {
        c->log("Computing MD5...");
        string computedMD5 = c->computeMD5();
        c->clear();
        if (c->expectedMD5 != computedMD5) {
          c->log("MISMATCH between expected MD5 '" + c->expectedMD5 + "', and computed MD5 '" + computedMD5 + "' ... marking the file as Mismatch");
          files[c->parentFileIndex].matchStatus = File::Status::FAILED_TO_MATCH_REMOTE_FILE;
          chunksFailed.produce(c);
        } else {
          c->log("Expected and computed MD5 match!");
          chunksFinished.produce(c);
        }
      }
    }
  } catch (boost::thread_interrupted &ti) {
    return;
  }
}
Beispiel #4
0
void readChunks(const vector<File> &files) {
  try {
    while (true) {
      Chunk * c = chunksToRead.consume();
      if (files[c->parentFileIndex].matchStatus == File::Status::FAILED_TO_MATCH_REMOTE_FILE) {
        // We have already marked file as a non-match, don't waste time reading more chunks from it
        c->log("File status == FAILED_TO_MATCH_REMOTE_FILE, Skipping the read...");
        chunksSkipped.produce(c);
      } else {
        c->log("Reading...");
        c->read();

        c->log("Finished reading");
        chunksToComputeMD5.produce(c);
      }
    }
  } catch (boost::thread_interrupted &ti) {
    return;
  }
}
Beispiel #5
0
int main(int argc, char * argv[]) {
    try {
        // Note: Verbose mode logging is enabled (if requested) by options parse()
        opt.parse(argc, argv);
    } catch (exception &e) {
        cerr << "Error processing arguments: " << e.what() << endl;
        opt.printHelp(argv[0]);
        return 1;
    }

    if (opt.env()) {
        opt.setApiserverDxConfig();  // needed for 'ua --env' to report project name
        printEnvironmentInfo();
        return 0;
    }

    if (opt.version()) {
        cout << "Upload Agent Version: " << UAVERSION;
#if OLD_KERNEL_SUPPORT
        cout << " (old-kernel-support)";
#endif
        cout << endl
             << "git version: " << DXTOOLKIT_GITVERSION << endl
             << "libboost version: " << (BOOST_VERSION / 100000) << "." << ((BOOST_VERSION / 100) % 1000) << "." << (BOOST_VERSION % 100) << endl
             << "libcurl version: " << LIBCURL_VERSION_MAJOR << "." << LIBCURL_VERSION_MINOR << "." << LIBCURL_VERSION_PATCH << endl;
        return 0;
    } else if (opt.help() || opt.files.empty()) {
        opt.printHelp(argv[0]);
        return (opt.help()) ? 0 : 1;
    }

    setUserAgentString(); // also sets dx::config::USER_AGENT_STRING()
    DXLOG(logINFO) << "DNAnexus Upload Agent " << UAVERSION << " (git version: " << DXTOOLKIT_GITVERSION << ")";
    DXLOG(logINFO) << "Upload agent's User Agent string: '" << userAgentString << "'";
    DXLOG(logINFO) << "dxcpp's User Agent string: '" << dx::config::USER_AGENT_STRING() << "'";
    DXLOG(logINFO) << opt;

    try {
        opt.setApiserverDxConfig();
        opt.validate();

        /*
         * Check for updates, and terminate execution if necessary. This also
         * has the side effect of verifying that we can connect to the API
         * server, and that the authentication token is valid.
         */
        try {
            checkForUpdates();
        } catch (runtime_error &e) {
            cerr << endl << e.what() << endl;
            return 3;
        }
        if (!opt.doNotResume) {
            disallowDuplicateFiles(opt.files, opt.projects);
        }
    } catch (exception &e) {
        cerr << endl << "ERROR: " << e.what() << endl;
        return 1;
    }

    const bool anyImportAppToBeCalled = (opt.reads || opt.pairedReads || opt.mappings || opt.variants);

    chunksToCompress.setCapacity(opt.compressThreads);
    chunksToUpload.setCapacity(opt.uploadThreads);
    int exitCode = 0;
    try {
        curlInit(); // for curl requests to be made by upload chunk request

        NUMTRIES_g = opt.tries;

        vector<File> files;

        for (unsigned int i = 0; i < opt.files.size(); ++i) {
            DXLOG(logINFO) << "Getting MIME type for local file " << opt.files[i] << "...";
            string mimeType = getMimeType(opt.files[i]);
            DXLOG(logINFO) << "MIME type for local file " << opt.files[i] << " is '" << mimeType << "'.";
            bool toCompress;
            if (!opt.doNotCompress) {
                bool is_compressed = isCompressed(mimeType);
                toCompress = !is_compressed;
                if (is_compressed)
                    DXLOG(logINFO) << "File " << opt.files[i] << " is already compressed, so won't try to compress it any further.";
                else
                    DXLOG(logINFO) << "File " << opt.files[i] << " is not compressed, will compress it before uploading.";
            } else {
                toCompress = false;
            }
            if (toCompress) {
                mimeType = "application/x-gzip";
            }
            files.push_back(File(opt.files[i], opt.projects[i], opt.folders[i], opt.names[i], toCompress, !opt.doNotResume, mimeType, opt.chunkSize, i));
            totalChunks += files[i].createChunks(chunksToRead, opt.tries);
            cerr << endl;
        }

        if (opt.waitOnClose) {
            for (unsigned int i = 0; i < files.size(); ++i) {
                files[i].waitOnClose = true;
            }
        }

        // Create folders all at once (instead of one by one, above, where we
        // initialize the File objects).
        createFolders(opt.projects, opt.folders);

        // Take this point as the starting time for program operation
        // (to calculate average transfer speed)
        startTime = std::time(0);

        DXLOG(logINFO) << "Created " << totalChunks << " chunks.";

        createWorkerThreads(files);

        DXLOG(logINFO) << "Creating monitor thread..";
        boost::thread monitorThread(monitor);

        boost::thread uploadProgressThread;
        if (opt.progress) {
            DXLOG(logINFO) << "Creating Upload Progress thread..";
            uploadProgressThread = boost::thread(uploadProgress, boost::ref(files));
        }

        DXLOG(logINFO) << "Joining monitor thread...";
        monitorThread.join();
        DXLOG(logINFO) << "Monitor thread finished.";

        if (opt.progress) {
            DXLOG(logINFO) << "Joining Upload Progress thread..";
            keepShowingUploadProgress = false;
            uploadProgressThread.interrupt();
            uploadProgressThread.join();
            DXLOG(logINFO) << "Upload Progress thread finished.";
        }


        interruptWorkerThreads();
        joinWorkerThreads();

        while (!chunksFailed.empty()) {
            Chunk * c = chunksFailed.consume();
            c->log("Chunk failed", logERROR);
            markFileAsFailed(files, c->fileID);
        }
        if (opt.verbose) {
            cerr << endl;
        }
        for (unsigned int i = 0; i < files.size(); ++i) {
            if (files[i].failed) {
                cerr << "File \""<< files[i].localFile << "\" could not be uploaded." << endl;
            } else {
                cerr << "File \"" << files[i].localFile << "\" was uploaded successfully. Closing..." << endl;
                if (files[i].isRemoteFileOpen) {
                    files[i].close();
                }
            }
            if (files[i].failed)
                files[i].fileID = "failed";
        }

        DXLOG(logINFO) << "Waiting for files to be closed...";
        boost::thread waitOnCloseThread(waitOnClose, boost::ref(files));
        DXLOG(logINFO) << "Joining wait-on-close thread...";
        waitOnCloseThread.join();
        DXLOG(logINFO) << "Wait-on-close thread finished.";
        if (anyImportAppToBeCalled) {
            runImportApps(opt, files);
        }
        for (unsigned i = 0; i < files.size(); ++i) {
            cout << files[i].fileID;
            if (files[i].fileID == "failed")
                exitCode = 1;
            if (anyImportAppToBeCalled) {
                if (files[i].jobID == "failed")
                    exitCode = 1;
                cout << "\t" << files[i].jobID;
            }
            cout << endl;
        }
        curlCleanup();

        DXLOG(logINFO) << "Exiting.";
    } catch (bad_alloc &e) {
        boost::call_once(bad_alloc_once, boost::bind(&handle_bad_alloc, e));
    } catch (exception &e) {
        curlCleanup();
        cerr << endl << "ERROR: " << e.what() << endl;
        return 1;
    }

    return exitCode;
}
Beispiel #6
0
void uploadChunks(vector<File> &files) {
    try {
        while (true) {
            Chunk * c = chunksToUpload.consume();

            c->log("Uploading...");

            bool uploaded = false;
            try {
                c->upload(opt);
                uploaded = true;
            } catch (runtime_error &e) {
                ostringstream msg;
                msg << "Upload failed: " << e.what();
                c->log(msg.str(), logERROR);
            }

            if (uploaded) {
                c->log("Upload succeeded!");
                int64_t size_of_chunk = c->data.size(); // this can be different than (c->end - c->start) because of compression
                c->clear();
                chunksFinished.produce(c);
                // Update number of bytes uploaded in parent file object
                boost::mutex::scoped_lock boLock(bytesUploadedMutex);
                files[c->parentFileIndex].bytesUploaded += (c->end - c->start);
                files[c->parentFileIndex].atleastOnePartDone = true;
                bytesUploadedSinceStart += size_of_chunk;
                boLock.unlock();
            } else if (c->triesLeft > 0) {
                int numTry = NUMTRIES_g - c->triesLeft + 1; // find out which try is it
                int timeout = (numTry > 6) ? 256 : 4 << numTry; // timeout is always between [8, 256] seconds
                c->log("Will retry reading and uploading this chunks in " + boost::lexical_cast<string>(timeout) + " seconds", logWARNING);
                if (!opt.noRoundRobinDNS) {
                    boost::mutex::scoped_lock forceRefreshLock(forceRefreshDNSMutex);
                    c->log("Setting forceRefreshDNS = true in main.cpp:uploadChunks()");
                    forceRefreshDNS = true; // refresh the DNS list in next call to getRandomIP()
                }
                --(c->triesLeft);
                c->clear(); // we will read & compress data again
                boost::this_thread::sleep(boost::posix_time::milliseconds(timeout * 1000));
                // We push the chunk to retry to "chunksToRead" and not "chunksToUpload"
                // Since chunksToUpload queue is bounded, and chunksToUpload.produce() can block,
                // thus giving rise to deadlock
                chunksToRead.produce(c);
            } else {
                c->log("Not retrying", logERROR);
                // TODO: Should we print it on stderr or DXLOG (verbose only) ??
                cerr << "\nFailed to upload Chunk [" << c->start << " - " << c->end << "] for local file ("
                     << files[c->parentFileIndex].localFile << "). APIServer response for last try: '" << c->respData << "'" << endl;
                c->clear();
                chunksFailed.produce(c);
            }
            // Sleep for tiny amount of time, to make sure we yield to other threads.
            // Note: boost::this_thread::yield() is not a valid interruption point,
            //       so we have to use sleep()
            boost::this_thread::sleep(boost::posix_time::microseconds(100));
        }
    } catch(std::bad_alloc &e) {
        boost::call_once(bad_alloc_once, boost::bind(&handle_bad_alloc, e));
    } catch (boost::thread_interrupted &ti) {
        return;
    }
}