void ThreadsController::deRegisterThread(int threadIndex) { GuardLock lock(controllerMutex_); auto it = threadStateMap_.find(threadIndex); WDT_CHECK(it != threadStateMap_.end()); threadStateMap_[threadIndex] = FINISHED; // Notify all the barriers for (auto barrier : barriers_) { WDT_CHECK(barrier != nullptr); barrier->deRegister(); } }
void Receiver::fixAndCloseTransferLog(bool transferSuccess) { if (!options_.enable_download_resumption) { return; } bool isInconsistentDirectory = (transferLogManager_.getResumptionStatus() == INCONSISTENT_DIRECTORY); bool isInvalidLog = (transferLogManager_.getResumptionStatus() == INVALID_LOG); if (transferSuccess && isInconsistentDirectory) { // write log header to validate directory in case of success WDT_CHECK(options_.resume_using_dir_tree); transferLogManager_.writeLogHeader(); } transferLogManager_.closeLog(); if (!transferSuccess) { return; } if (isInvalidLog) { transferLogManager_.renameBuggyLog(); } if (!options_.keep_transfer_log) { transferLogManager_.unlink(); } }
SenderState SenderThread::sendBlocks() { WTVLOG(1) << "entered SEND_BLOCKS state"; ThreadTransferHistory &transferHistory = getTransferHistory(); if (threadProtocolVersion_ >= Protocol::RECEIVER_PROGRESS_REPORT_VERSION && !totalSizeSent_ && dirQueue_->fileDiscoveryFinished()) { return SEND_SIZE_CMD; } ErrorCode transferStatus; std::unique_ptr<ByteSource> source = dirQueue_->getNextSource(threadCtx_.get(), transferStatus); if (!source) { // try to read any buffered heart-beats readHeartBeats(); return SEND_DONE_CMD; } WDT_CHECK(!source->hasError()); TransferStats transferStats = sendOneByteSource(source, transferStatus); threadStats_ += transferStats; source->addTransferStats(transferStats); source->close(); if (!transferHistory.addSource(source)) { // global checkpoint received for this thread. no point in // continuing WTLOG(ERROR) << "global checkpoint received. Stopping"; threadStats_.setLocalErrorCode(CONN_ERROR); return END; } if (transferStats.getLocalErrorCode() != OK) { return CHECK_FOR_ABORT; } return SEND_BLOCKS; }
TransferReport::TransferReport( std::vector<TransferStats>& transferredSourceStats, std::vector<TransferStats>& failedSourceStats, std::vector<TransferStats>& threadStats, std::vector<std::string>& failedDirectories, double totalTime, int64_t totalFileSize, int64_t numDiscoveredFiles) : transferredSourceStats_(std::move(transferredSourceStats)), failedSourceStats_(std::move(failedSourceStats)), threadStats_(std::move(threadStats)), failedDirectories_(std::move(failedDirectories)), totalTime_(totalTime), totalFileSize_(totalFileSize) { for (const auto& stats : threadStats_) { summary_ += stats; } ErrorCode summaryErrorCode = summary_.getErrorCode(); bool atLeastOneOk = false; for (auto& stats : threadStats_) { if (stats.getErrorCode() == OK) { atLeastOneOk = true; break; } } LOG(INFO) << "Error code summary " << errorCodeToStr(summaryErrorCode); // none of the files or directories failed bool possiblyOk = true; if (!failedDirectories_.empty()) { possiblyOk = false; summaryErrorCode = getMoreInterestingError(summaryErrorCode, BYTE_SOURCE_READ_ERROR); } for (const auto& sourceStat : failedSourceStats_) { possiblyOk = false; summaryErrorCode = getMoreInterestingError(summaryErrorCode, sourceStat.getErrorCode()); } if (possiblyOk && atLeastOneOk) { if (summaryErrorCode != OK) { LOG(WARNING) << "WDT successfully recovered from error " << errorCodeToStr(summaryErrorCode); } summaryErrorCode = OK; } setErrorCode(summaryErrorCode); if (summary_.getEffectiveDataBytes() != totalFileSize_) { // sender did not send all the bytes LOG(INFO) << "Could not send all the bytes " << totalFileSize_ << " " << summary_.getEffectiveDataBytes(); WDT_CHECK(summaryErrorCode != OK) << "BUG: All threads OK yet sized based error detected"; } std::set<std::string> failedFilesSet; for (auto& stats : failedSourceStats_) { failedFilesSet.insert(stats.getId()); } int64_t numTransferredFiles = numDiscoveredFiles - failedFilesSet.size(); summary_.setNumFiles(numTransferredFiles); }
shared_ptr<Funnel> ThreadsController::getFunnel(const uint64_t funnelIndex) { bool isExists = (funnelExecutors_.size() > funnelIndex) && (funnelExecutors_[funnelIndex] != nullptr); WDT_CHECK(isExists) << "Requesting for a funnel that doesn't exist. Request index : " << funnelIndex << ", num funnels " << funnelExecutors_.size(); return funnelExecutors_[funnelIndex]; }
shared_ptr<Barrier> ThreadsController::getBarrier(const uint64_t barrierIndex) { bool isExists = (barriers_.size() > barrierIndex) && (barriers_[barrierIndex] != nullptr); WDT_CHECK(isExists) << "Requesting for a barrier that doesn't exist. Request index : " << barrierIndex << ", num barriers " << barriers_.size(); return barriers_[barrierIndex]; }
// this can be called many times after initializeWdt() Wdt &Wdt::getWdt() { Wdt &res = getWdtInternal(); if (!res.initDone_) { LOG(ERROR) << "Called getWdt() before/without initializeWdt()"; WDT_CHECK(false) << "Must call initializeWdt() once before getWdt()"; } return res; }
void Sender::reportProgress() { WDT_CHECK(progressReportIntervalMillis_ > 0); int throughputUpdateIntervalMillis = options_.throughput_update_interval_millis; WDT_CHECK(throughputUpdateIntervalMillis >= 0); int throughputUpdateInterval = throughputUpdateIntervalMillis / progressReportIntervalMillis_; int64_t lastEffectiveBytes = 0; std::chrono::time_point<Clock> lastUpdateTime = Clock::now(); int intervalsSinceLastUpdate = 0; double currentThroughput = 0; auto waitingTime = std::chrono::milliseconds(progressReportIntervalMillis_); LOG(INFO) << "Progress reporter tracking every " << progressReportIntervalMillis_ << " ms"; while (true) { { std::unique_lock<std::mutex> lock(mutex_); conditionFinished_.wait_for(lock, waitingTime); if (transferStatus_ == THREADS_JOINED) { break; } } if (!dirQueue_->fileDiscoveryFinished()) { continue; } std::unique_ptr<TransferReport> transferReport = getTransferReport(); intervalsSinceLastUpdate++; if (intervalsSinceLastUpdate >= throughputUpdateInterval) { auto curTime = Clock::now(); int64_t curEffectiveBytes = transferReport->getSummary().getEffectiveDataBytes(); double time = durationSeconds(curTime - lastUpdateTime); currentThroughput = (curEffectiveBytes - lastEffectiveBytes) / time; lastEffectiveBytes = curEffectiveBytes; lastUpdateTime = curTime; intervalsSinceLastUpdate = 0; } transferReport->setCurrentThroughput(currentThroughput); progressReporter_->progress(transferReport); } }
void WdtThread::startThread() { if (threadPtr_) { WDT_CHECK(false) << "There is a already a thread running " << threadIndex_ << " " << getPort(); } auto state = controller_->getState(threadIndex_); // Check the state should be running here WDT_CHECK_EQ(state, RUNNING); threadPtr_.reset(new std::thread(&WdtThread::start, this)); }
void WdtBase::configureThrottler() { WDT_CHECK(!throttler_); VLOG(1) << "Configuring throttler options"; throttler_ = Throttler::makeThrottler(options_); if (throttler_) { LOG(INFO) << "Enabling throttling " << *throttler_; } else { LOG(INFO) << "Throttling not enabled"; } }
void WdtBase::setProtocolVersion(int64_t protocol) { WDT_CHECK(protocol > 0) << "Protocol version can't be <= 0 " << protocol; int negotiatedPv = Protocol::negotiateProtocol(protocol); if (negotiatedPv != protocol) { LOG(WARNING) << "Negotiated protocol version " << protocol << " -> " << negotiatedPv; } protocolVersion_ = negotiatedPv; LOG(INFO) << "using wdt protocol version " << protocolVersion_; }
shared_ptr<ConditionGuard> ThreadsController::getCondition( const uint64_t conditionIndex) { bool isExists = (conditionGuards_.size() > conditionIndex) && (conditionGuards_[conditionIndex] != nullptr); WDT_CHECK(isExists) << "Requesting for a condition wrapper that doesn't exist." << " Request Index : " << conditionIndex << ", num condition wrappers : " << conditionGuards_.size(); return conditionGuards_[conditionIndex]; }
char *FileByteSource::read(int64_t &size) { size = 0; if (hasError() || finished()) { return nullptr; } int64_t expectedRead = (int64_t)std::min<int64_t>(buffer_->size_, size_ - bytesRead_); int64_t toRead = expectedRead; if (alignedReadNeeded_) { toRead = ((expectedRead + kDiskBlockSize - 1) / kDiskBlockSize) * kDiskBlockSize; } // actualRead is guaranteed to be <= buffer_->size_ WDT_CHECK(toRead <= buffer_->size_) << "Attempting to read " << toRead << " while buffer size is " << buffer_->size_; START_PERF_TIMER int64_t numRead = ::pread(fd_, buffer_->data_, toRead, offset_ + bytesRead_); if (numRead < 0) { PLOG(ERROR) << "failure while reading file " << metadata_->fullPath; this->close(); transferStats_.setErrorCode(BYTE_SOURCE_READ_ERROR); return nullptr; } if (numRead == 0) { this->close(); return nullptr; } RECORD_PERF_RESULT(PerfStatReport::FILE_READ) // Can only happen in case of O_DIRECT and when // we are trying to read the last chunk of file // or we are reading in multiples of disk block size // from a sub block of the file smaller than disk block // size if (numRead > expectedRead) { WDT_CHECK(alignedReadNeeded_); numRead = expectedRead; } bytesRead_ += numRead; size = numRead; return buffer_->data_; }
void Barrier::execute() { unique_lock<mutex> lock(mutex_); WDT_CHECK(!isComplete_) << "Hitting the barrier after completion"; ++numHits_; if (checkForFinish()) { return; } while (!isComplete_) { cv_.wait(lock); } }
ErrorCode TransferLogManager::openLog() { WDT_CHECK(fd_ < 0); WDT_CHECK(!rootDir_.empty()) << "Root directory not set"; WDT_CHECK(options_.enable_download_resumption); const std::string logPath = getFullPath(kWdtLogName); fd_ = ::open(logPath.c_str(), O_RDWR); if (fd_ < 0) { if (errno != ENOENT) { PLOG(ERROR) << "Could not open wdt log " << logPath; return TRANSFER_LOG_ACQUIRE_ERROR; } else { // creation of the log path (which can still be a race) LOG(INFO) << logPath << " doesn't exist... creating..."; fd_ = ::open(logPath.c_str(), O_CREAT | O_EXCL, 0644); if (fd_ < 0) { PLOG(WARNING) << "Could not create wdt log (maybe ok if race): " << logPath; } else { // On windows/cygwin for instance the flock will silently succeed yet // not lock on a newly created file... workaround is to close and reopen ::close(fd_); } fd_ = ::open(logPath.c_str(), O_RDWR); if (fd_ < 0) { PLOG(ERROR) << "Still couldn't open wdt log after create attempt: " << logPath; return TRANSFER_LOG_ACQUIRE_ERROR; } } } // try to acquire file lock if (::flock(fd_, LOCK_EX | LOCK_NB) != 0) { PLOG(ERROR) << "Failed to acquire transfer log lock " << logPath << " " << fd_; close(); return TRANSFER_LOG_ACQUIRE_ERROR; } LOG(INFO) << "Transfer log opened and lock acquired on " << logPath; return OK; }
std::vector<TransferStats> ThreadTransferHistory::popAckedSourceStats() { std::unique_lock<std::mutex> lock(mutex_); const int64_t historySize = history_.size(); WDT_CHECK(numAcknowledged_ == historySize); // no locking needed, as this should be called after transfer has finished std::vector<TransferStats> sourceStats; while (!history_.empty()) { sourceStats.emplace_back(std::move(history_.back()->getTransferStats())); history_.pop_back(); } return sourceStats; }
void WdtBase::negotiateProtocol() { int protocol = transferRequest_.protocolVersion; WDT_CHECK(protocol > 0) << "Protocol version can't be <= 0 " << protocol; int negotiatedPv = Protocol::negotiateProtocol(protocol); if (negotiatedPv != protocol) { WLOG(WARNING) << "Negotiated protocol version " << protocol << " -> " << negotiatedPv; } transferRequest_.protocolVersion = negotiatedPv; WLOG(INFO) << "using wdt protocol version " << transferRequest_.protocolVersion; }
int64_t WdtSocket::ioWithAbortCheck(F readOrWrite, T tbuf, int64_t numBytes, int timeoutMs, bool tryFull) { WDT_CHECK(threadCtx_.getAbortChecker() != nullptr) << "abort checker can not be null"; bool checkAbort = (threadCtx_.getOptions().abort_check_interval_millis > 0); auto startTime = Clock::now(); int64_t doneBytes = 0; int retries = 0; while (doneBytes < numBytes) { const int64_t ret = readOrWrite(fd_, tbuf + doneBytes, numBytes - doneBytes); if (ret < 0) { // error if (errno != EINTR && errno != EAGAIN) { PLOG(ERROR) << "non-retryable error encountered during socket io " << fd_ << " " << doneBytes << " " << retries; return (doneBytes > 0 ? doneBytes : ret); } } else if (ret == 0) { // eof WVLOG(1) << "EOF received during socket io. fd : " << fd_ << ", finished bytes : " << doneBytes << ", retries : " << retries; return doneBytes; } else { // success doneBytes += ret; if (!tryFull) { // do not have to read/write entire data return doneBytes; } } if (checkAbort && threadCtx_.getAbortChecker()->shouldAbort()) { WLOG(ERROR) << "transfer aborted during socket io " << fd_ << " " << doneBytes << " " << retries; return (doneBytes > 0 ? doneBytes : -1); } if (timeoutMs > 0) { int duration = durationMillis(Clock::now() - startTime); if (duration >= timeoutMs) { WLOG(INFO) << "socket io timed out after " << duration << " ms, retries " << retries << " fd " << fd_ << " doneBytes " << doneBytes; return (doneBytes > 0 ? doneBytes : -1); } } retries++; } WVLOG_IF(1, retries > 1) << "socket io for " << doneBytes << " bytes took " << retries << " retries"; return doneBytes; }
void Sender::validateTransferStats( const std::vector<TransferStats> &transferredSourceStats, const std::vector<TransferStats> &failedSourceStats) { int64_t sourceFailedAttempts = 0; int64_t sourceDataBytes = 0; int64_t sourceEffectiveDataBytes = 0; int64_t sourceNumBlocks = 0; int64_t threadFailedAttempts = 0; int64_t threadDataBytes = 0; int64_t threadEffectiveDataBytes = 0; int64_t threadNumBlocks = 0; for (const auto &stat : transferredSourceStats) { sourceFailedAttempts += stat.getFailedAttempts(); sourceDataBytes += stat.getDataBytes(); sourceEffectiveDataBytes += stat.getEffectiveDataBytes(); sourceNumBlocks += stat.getNumBlocks(); } for (const auto &stat : failedSourceStats) { sourceFailedAttempts += stat.getFailedAttempts(); sourceDataBytes += stat.getDataBytes(); sourceEffectiveDataBytes += stat.getEffectiveDataBytes(); sourceNumBlocks += stat.getNumBlocks(); } for (const auto &senderThread : senderThreads_) { const auto &stat = senderThread->getTransferStats(); threadFailedAttempts += stat.getFailedAttempts(); threadDataBytes += stat.getDataBytes(); threadEffectiveDataBytes += stat.getEffectiveDataBytes(); threadNumBlocks += stat.getNumBlocks(); } WDT_CHECK(sourceFailedAttempts == threadFailedAttempts); WDT_CHECK(sourceDataBytes == threadDataBytes); WDT_CHECK(sourceEffectiveDataBytes == threadEffectiveDataBytes); WDT_CHECK(sourceNumBlocks == threadNumBlocks); }
// TODO: consider refactoring this to return error code void WdtSocket::readEncryptionSettingsOnce(int timeoutMs) { if (!encryptionParams_.isSet() || encryptionSettingsRead_) { return; } WDT_CHECK(!encryptionParams_.getSecret().empty()); int numRead = readInternal(buf_, 1, timeoutMs, true); if (numRead != 1) { LOG(ERROR) << "Failed to read encryption settings " << numRead << " " << port_; return; } if (buf_[0] != Protocol::ENCRYPTION_CMD) { LOG(ERROR) << "Expected to read ENCRYPTION_CMD(e), but got " << buf_[0]; readErrorCode_ = UNEXPECTED_CMD_ERROR; return; } int toRead = Protocol::kMaxEncryption - 1; // already read 1 byte for cmd numRead = readInternal(buf_, toRead, threadCtx_.getOptions().read_timeout_millis, true); if (numRead != toRead) { LOG(ERROR) << "Failed to read encryption settings " << numRead << " " << toRead << " " << port_; readErrorCode_ = SOCKET_READ_ERROR; return; } int64_t off = 0; EncryptionType encryptionType; std::string iv; if (!Protocol::decodeEncryptionSettings(buf_, off, Protocol::kMaxEncryption, encryptionType, iv)) { LOG(ERROR) << "Failed to decode encryption settings"; readErrorCode_ = PROTOCOL_ERROR; return; } if (encryptionType != encryptionParams_.getType()) { LOG(ERROR) << "Encryption type mismatch " << encryptionTypeToStr(encryptionType) << " " << encryptionTypeToStr(encryptionParams_.getType()); readErrorCode_ = PROTOCOL_ERROR; return; } if (!decryptor_.start(encryptionParams_, iv)) { readErrorCode_ = ENCRYPTION_ERROR; return; } LOG(INFO) << "Successfully read encryption settings " << port_ << " " << encryptionTypeToStr(encryptionType); encryptionSettingsRead_ = true; }
void TransferLogManager::writeEntriesToDisk() { WDT_CHECK(fd_ >= 0) << "Writer thread started before the log is opened"; LOG(INFO) << "Transfer log writer thread started"; WDT_CHECK(options_.transfer_log_write_interval_ms >= 0); auto waitingTime = std::chrono::milliseconds(options_.transfer_log_write_interval_ms); std::vector<std::string> entries; bool finished = false; while (!finished) { { std::unique_lock<std::mutex> lock(mutex_); conditionFinished_.wait_for(lock, waitingTime); finished = finished_; // make a copy of all the entries so that we do not need to hold lock // during writing entries = entries_; entries_.clear(); } std::string buffer; // write entries to disk for (const auto &entry : entries) { buffer.append(entry); } if (buffer.empty()) { // do not write when there is nothing to write continue; } int toWrite = buffer.size(); int written = ::write(fd_, buffer.c_str(), toWrite); if (written != toWrite) { PLOG(ERROR) << "Disk write error while writing transfer log " << written << " " << toWrite; return; } } LOG(INFO) << "Transfer log writer thread finished"; }
ErrorCode LogParser::processFileResizeEntry(char *buf, int size) { if (!headerParsed_) { LOG(ERROR) << "Invalid log: File resize entry found before transfer log header"; return INVALID_LOG; } int64_t timestamp, seqId, fileSize; if (!encoderDecoder_.decodeFileResizeEntry(buf, size, timestamp, seqId, fileSize)) { return INVALID_LOG; } if (parseOnly_) { std::cout << getFormattedTimestamp(timestamp) << " File resized," << " seq-id " << seqId << " new file-size " << fileSize << std::endl; return OK; } if (options_.resume_using_dir_tree) { LOG(ERROR) << "Can not have a file resize entry in directory based " "resumption mode " << seqId << " " << fileSize; return INVALID_LOG; } auto it = fileInfoMap_.find(seqId); if (it == fileInfoMap_.end()) { LOG(ERROR) << "File resize entry for unknown sequence-id " << seqId << " " << fileSize; return INVALID_LOG; } FileChunksInfo &chunksInfo = it->second; const std::string &fileName = chunksInfo.getFileName(); auto sizeIt = seqIdToSizeMap_.find(seqId); WDT_CHECK(sizeIt != seqIdToSizeMap_.end()); if (fileSize < sizeIt->second) { LOG(ERROR) << "File size can not reduce during resizing " << fileName << " " << seqId << " " << fileSize << " " << sizeIt->second; return INVALID_LOG; } if (options_.shouldPreallocateFiles() && fileSize > chunksInfo.getFileSize()) { LOG(ERROR) << "Size on the disk is less than the resized size for " << fileName << " seq-id " << seqId << " disk-size " << chunksInfo.getFileSize() << " resized-size " << fileSize; return INVALID_LOG; } sizeIt->second = fileSize; return OK; }
ErrorCode Receiver::runForever() { WDT_CHECK(!options_.enable_download_resumption) << "Transfer resumption not supported in long running mode"; // Enforce the full reporting to be false in the daemon mode. // These statistics are expensive, and useless as they will never // be received/reviewed in a forever running process. ErrorCode errCode = start(); if (errCode != OK) { return errCode; } finish(); // This method should never finish return ERROR; }
void WdtBase::configureThrottler() { WDT_CHECK(!throttler_); VLOG(1) << "Configuring throttler options"; const auto& options = WdtOptions::get(); double avgRateBytesPerSec = options.avg_mbytes_per_sec * kMbToB; double peakRateBytesPerSec = options.max_mbytes_per_sec * kMbToB; double bucketLimitBytes = options.throttler_bucket_limit * kMbToB; throttler_ = Throttler::makeThrottler(avgRateBytesPerSec, peakRateBytesPerSec, bucketLimitBytes, options.throttler_log_time_millis); if (throttler_) { LOG(INFO) << "Enabling throttling " << *throttler_; } else { LOG(INFO) << "Throttling not enabled"; } }
ErrorCode ThreadTransferHistory::validateCheckpoint( const Checkpoint &checkpoint, bool globalCheckpoint) { if (lastCheckpoint_ == nullptr) { return OK; } if (checkpoint.numBlocks < lastCheckpoint_->numBlocks) { WLOG(ERROR) << "Current checkpoint must be higher than previous checkpoint, " "Last checkpoint: " << *lastCheckpoint_ << ", Current checkpoint: " << checkpoint; return INVALID_CHECKPOINT; } if (checkpoint.numBlocks > lastCheckpoint_->numBlocks) { return OK; } bool noProgress = false; // numBlocks same if (checkpoint.lastBlockSeqId == lastCheckpoint_->lastBlockSeqId && checkpoint.lastBlockOffset == lastCheckpoint_->lastBlockOffset) { // same block if (checkpoint.lastBlockReceivedBytes != lastCheckpoint_->lastBlockReceivedBytes) { WLOG(ERROR) << "Current checkpoint has different received bytes, but all " "other fields are same, Last checkpoint " << *lastCheckpoint_ << ", Current checkpoint: " << checkpoint; return INVALID_CHECKPOINT; } noProgress = true; } else { // different block WDT_CHECK(checkpoint.lastBlockReceivedBytes >= 0); if (checkpoint.lastBlockReceivedBytes == 0) { noProgress = true; } } if (noProgress && !globalCheckpoint) { // we can get same global checkpoint multiple times, so no need to check for // progress WLOG(WARNING) << "No progress since last checkpoint, Last checkpoint: " << *lastCheckpoint_ << ", Current checkpoint: " << checkpoint; return NO_PROGRESS; } return OK; }
void WdtSocket::writeEncryptionSettingsOnce() { if (!encryptionParams_.isSet() || encryptionSettingsWritten_) { return; } WDT_CHECK(!encryptionParams_.getSecret().empty()); int timeoutMs = threadCtx_.getOptions().write_timeout_millis; std::string iv; if (!encryptor_.start(encryptionParams_, iv)) { writeErrorCode_ = ENCRYPTION_ERROR; return; } int64_t off = 0; buf_[off++] = Protocol::ENCRYPTION_CMD; Protocol::encodeEncryptionSettings(buf_, off, off + Protocol::kMaxEncryption, encryptionParams_.getType(), iv); int written = writeInternal(buf_, off, timeoutMs, false); if (written != off) { LOG(ERROR) << "Failed to write encryption settings " << written << " " << port_; return; } encryptionSettingsWritten_ = true; }
TransferStats SenderThread::sendOneByteSource( const std::unique_ptr<ByteSource> &source, ErrorCode transferStatus) { TransferStats stats; char headerBuf[Protocol::kMaxHeader]; int64_t off = 0; headerBuf[off++] = Protocol::FILE_CMD; headerBuf[off++] = transferStatus; char *headerLenPtr = headerBuf + off; off += sizeof(int16_t); const int64_t expectedSize = source->getSize(); int64_t actualSize = 0; const SourceMetaData &metadata = source->getMetaData(); BlockDetails blockDetails; blockDetails.fileName = metadata.relPath; blockDetails.seqId = metadata.seqId; blockDetails.fileSize = metadata.size; blockDetails.offset = source->getOffset(); blockDetails.dataSize = expectedSize; blockDetails.allocationStatus = metadata.allocationStatus; blockDetails.prevSeqId = metadata.prevSeqId; Protocol::encodeHeader(wdtParent_->getProtocolVersion(), headerBuf, off, Protocol::kMaxHeader, blockDetails); int16_t littleEndianOff = folly::Endian::little((int16_t)off); folly::storeUnaligned<int16_t>(headerLenPtr, littleEndianOff); int64_t written = socket_->write(headerBuf, off); if (written != off) { WTPLOG(ERROR) << "Write error/mismatch " << written << " " << off << ". fd = " << socket_->getFd() << ". file = " << metadata.relPath << ". port = " << socket_->getPort(); stats.setLocalErrorCode(SOCKET_WRITE_ERROR); stats.incrFailedAttempts(); return stats; } stats.addHeaderBytes(written); int64_t byteSourceHeaderBytes = written; int64_t throttlerInstanceBytes = byteSourceHeaderBytes; int64_t totalThrottlerBytes = 0; WTVLOG(3) << "Sent " << written << " on " << socket_->getFd() << " : " << folly::humanify(std::string(headerBuf, off)); int32_t checksum = 0; while (!source->finished()) { // TODO: handle protocol errors from readHeartBeats readHeartBeats(); int64_t size; char *buffer = source->read(size); if (source->hasError()) { WTLOG(ERROR) << "Failed reading file " << source->getIdentifier() << " for fd " << socket_->getFd(); break; } WDT_CHECK(buffer && size > 0); if (footerType_ == CHECKSUM_FOOTER) { checksum = folly::crc32c((const uint8_t *)buffer, size, checksum); } if (wdtParent_->getThrottler()) { /** * If throttling is enabled we call limit(deltaBytes) which * used both the methods of throttling peak and average. * Always call it with bytes being written to the wire, throttler * will do the rest. * The first time throttle is called with the header bytes * included. In the next iterations throttler is only called * with the bytes being written. */ throttlerInstanceBytes += size; wdtParent_->getThrottler()->limit(*threadCtx_, throttlerInstanceBytes); totalThrottlerBytes += throttlerInstanceBytes; throttlerInstanceBytes = 0; } written = socket_->write(buffer, size, /* retry writes */ true); if (getThreadAbortCode() != OK) { WTLOG(ERROR) << "Transfer aborted during block transfer " << socket_->getPort() << " " << source->getIdentifier(); stats.setLocalErrorCode(ABORT); stats.incrFailedAttempts(); return stats; } if (written != size) { WTLOG(ERROR) << "Write error " << written << " (" << size << ")" << ". fd = " << socket_->getFd() << ". file = " << metadata.relPath << ". port = " << socket_->getPort(); stats.setLocalErrorCode(SOCKET_WRITE_ERROR); stats.incrFailedAttempts(); return stats; } stats.addDataBytes(written); actualSize += written; } if (actualSize != expectedSize) { // Can only happen if sender thread can not read complete source byte // stream WTLOG(ERROR) << "UGH " << source->getIdentifier() << " " << expectedSize << " " << actualSize; struct stat fileStat; if (stat(metadata.fullPath.c_str(), &fileStat) != 0) { WTPLOG(ERROR) << "stat failed on path " << metadata.fullPath; } else { WTLOG(WARNING) << "file " << source->getIdentifier() << " previous size " << metadata.size << " current size " << fileStat.st_size; } stats.setLocalErrorCode(BYTE_SOURCE_READ_ERROR); stats.incrFailedAttempts(); return stats; } if (wdtParent_->getThrottler() && actualSize > 0) { WDT_CHECK(totalThrottlerBytes == actualSize + byteSourceHeaderBytes) << totalThrottlerBytes << " " << (actualSize + totalThrottlerBytes); } if (footerType_ != NO_FOOTER) { off = 0; headerBuf[off++] = Protocol::FOOTER_CMD; Protocol::encodeFooter(headerBuf, off, Protocol::kMaxFooter, checksum); int toWrite = off; written = socket_->write(headerBuf, toWrite); if (written != toWrite) { WTLOG(ERROR) << "Write mismatch " << written << " " << toWrite; stats.setLocalErrorCode(SOCKET_WRITE_ERROR); stats.incrFailedAttempts(); return stats; } stats.addHeaderBytes(toWrite); } stats.setLocalErrorCode(OK); stats.incrNumBlocks(); stats.addEffectiveBytes(stats.getHeaderBytes(), stats.getDataBytes()); return stats; }
void Throttler::deRegisterTransfer() { folly::SpinLockGuard lock(throttlerMutex_); WDT_CHECK(refCount_ > 0); refCount_--; }
std::unique_ptr<TransferReport> Sender::finish() { std::unique_lock<std::mutex> instanceLock(instanceManagementMutex_); VLOG(1) << "Sender::finish()"; TransferStatus status = getTransferStatus(); if (status == NOT_STARTED) { LOG(WARNING) << "Even though transfer has not started, finish is called"; // getTransferReport will set the error code to ERROR return getTransferReport(); } if (status == THREADS_JOINED) { VLOG(1) << "Threads have already been joined. Returning the" << " existing transfer report"; return getTransferReport(); } const bool twoPhases = options_.two_phases; bool progressReportEnabled = progressReporter_ && progressReportIntervalMillis_ > 0; for (auto &senderThread : senderThreads_) { senderThread->finish(); } if (!twoPhases) { dirThread_.join(); } WDT_CHECK(numActiveThreads_ == 0); setTransferStatus(THREADS_JOINED); if (progressReportEnabled) { progressReporterThread_.join(); } std::vector<TransferStats> threadStats; for (auto &senderThread : senderThreads_) { threadStats.push_back(senderThread->moveStats()); } bool allSourcesAcked = false; for (auto &senderThread : senderThreads_) { auto &stats = senderThread->getTransferStats(); if (stats.getErrorCode() == OK) { // at least one thread finished correctly // that means all transferred sources are acked allSourcesAcked = true; break; } } std::vector<TransferStats> transferredSourceStats; for (auto port : transferRequest_.ports) { auto &transferHistory = transferHistoryController_->getTransferHistory(port); if (allSourcesAcked) { transferHistory.markAllAcknowledged(); } else { transferHistory.returnUnackedSourcesToQueue(); } if (options_.full_reporting) { std::vector<TransferStats> stats = transferHistory.popAckedSourceStats(); transferredSourceStats.insert(transferredSourceStats.end(), std::make_move_iterator(stats.begin()), std::make_move_iterator(stats.end())); } } if (options_.full_reporting) { validateTransferStats(transferredSourceStats, dirQueue_->getFailedSourceStats()); } int64_t totalFileSize = dirQueue_->getTotalSize(); double totalTime = durationSeconds(endTime_ - startTime_); std::unique_ptr<TransferReport> transferReport = folly::make_unique<TransferReport>( transferredSourceStats, dirQueue_->getFailedSourceStats(), threadStats, dirQueue_->getFailedDirectories(), totalTime, totalFileSize, dirQueue_->getCount()); if (progressReportEnabled) { progressReporter_->end(transferReport); } if (options_.enable_perf_stat_collection) { PerfStatReport report(options_); for (auto &senderThread : senderThreads_) { report += senderThread->getPerfReport(); } report += dirQueue_->getPerfReport(); LOG(INFO) << report; } double directoryTime; directoryTime = dirQueue_->getDirectoryTime(); LOG(INFO) << "Total sender time = " << totalTime << " seconds (" << directoryTime << " dirTime)" << ". Transfer summary : " << *transferReport << "\nTotal sender throughput = " << transferReport->getThroughputMBps() << " Mbytes/sec (" << transferReport->getSummary().getEffectiveTotalBytes() / (totalTime - directoryTime) / kMbToB << " Mbytes/sec pure transfer rate)"; return transferReport; }
SenderState SenderThread::processVersionMismatch() { WTLOG(INFO) << "entered PROCESS_VERSION_MISMATCH state "; WDT_CHECK(threadStats_.getLocalErrorCode() == ABORT); auto negotiationStatus = wdtParent_->getNegotiationStatus(); WDT_CHECK_NE(negotiationStatus, V_MISMATCH_FAILED) << "Thread should have ended in case of version mismatch"; if (negotiationStatus == V_MISMATCH_RESOLVED) { WTLOG(WARNING) << "Protocol version already negotiated, but " "transfer still aborted due to version mismatch"; return END; } WDT_CHECK_EQ(negotiationStatus, V_MISMATCH_WAIT); // Need a barrier here to make sure all the negotiated protocol versions // have been collected auto barrier = controller_->getBarrier(VERSION_MISMATCH_BARRIER); barrier->execute(); WTVLOG(1) << "cleared the protocol version barrier"; auto execFunnel = controller_->getFunnel(VERSION_MISMATCH_FUNNEL); while (true) { auto status = execFunnel->getStatus(); switch (status) { case FUNNEL_START: { WTLOG(INFO) << "started the funnel for version mismatch"; wdtParent_->setProtoNegotiationStatus(V_MISMATCH_FAILED); if (transferHistoryController_->handleVersionMismatch() != OK) { execFunnel->notifySuccess(); return END; } int negotiatedProtocol = 0; for (int threadProtocolVersion : wdtParent_->getNegotiatedProtocols()) { if (threadProtocolVersion > 0) { if (negotiatedProtocol > 0 && negotiatedProtocol != threadProtocolVersion) { WTLOG(ERROR) << "Different threads negotiated different protocols " << negotiatedProtocol << " " << threadProtocolVersion; execFunnel->notifySuccess(); return END; } negotiatedProtocol = threadProtocolVersion; } } WDT_CHECK_GT(negotiatedProtocol, 0); WLOG_IF(INFO, negotiatedProtocol != threadProtocolVersion_) << *this << " Changing protocol version to " << negotiatedProtocol << ", previous version " << threadProtocolVersion_; wdtParent_->setProtocolVersion(negotiatedProtocol); threadProtocolVersion_ = wdtParent_->getProtocolVersion(); setFooterType(); threadStats_.setRemoteErrorCode(OK); wdtParent_->setProtoNegotiationStatus(V_MISMATCH_RESOLVED); wdtParent_->clearAbort(); execFunnel->notifySuccess(); return CONNECT; } case FUNNEL_PROGRESS: { execFunnel->wait(); break; } case FUNNEL_END: { negotiationStatus = wdtParent_->getNegotiationStatus(); WDT_CHECK_NE(negotiationStatus, V_MISMATCH_WAIT); if (negotiationStatus == V_MISMATCH_FAILED) { return END; } if (negotiationStatus == V_MISMATCH_RESOLVED) { threadProtocolVersion_ = wdtParent_->getProtocolVersion(); threadStats_.setRemoteErrorCode(OK); return CONNECT; } } } } }