ErrorCode SenderThread::readNextReceiverCmd() { int numUnackedBytes = socket_->getUnackedBytes(); int timeToClearSendBuffer = 0; Clock::time_point startTime = Clock::now(); while (true) { int numRead = socket_->read(buf_, 1); if (numRead == 1) { return OK; } if (getThreadAbortCode() != OK) { return ABORT; } if (numRead == 0) { WTPLOG(ERROR) << "Got unexpected EOF, reconnecting"; return SOCKET_READ_ERROR; } WDT_CHECK_LT(numRead, 0); ErrorCode errCode = socket_->getReadErrCode(); WTLOG(ERROR) << "Failed to read receiver cmd " << numRead << " " << errorCodeToStr(errCode); if (errCode != WDT_TIMEOUT) { // not timed out return SOCKET_READ_ERROR; } int curUnackedBytes = socket_->getUnackedBytes(); if (numUnackedBytes < 0 || curUnackedBytes < 0) { WTLOG(ERROR) << "Failed to read number of unacked bytes, reconnecting"; return SOCKET_READ_ERROR; } WDT_CHECK_GE(numUnackedBytes, curUnackedBytes); if (curUnackedBytes == 0) { timeToClearSendBuffer = durationMillis(Clock::now() - startTime); break; } if (curUnackedBytes == numUnackedBytes) { WTLOG(ERROR) << "Number of unacked bytes did not change, reconnecting " << curUnackedBytes; return SOCKET_READ_ERROR; } WTLOG(INFO) << "Read receiver command failed, but number of unacked " "bytes decreased, retrying socket read " << numUnackedBytes << " " << curUnackedBytes; numUnackedBytes = curUnackedBytes; } // we are assuming that sender and receiver tcp buffer sizes are same. So, we // expect another timeToClearSendBuffer milliseconds for receiver to clear its // buffer int readTimeout = timeToClearSendBuffer + options_.drain_extra_ms; WTLOG(INFO) << "Send buffer cleared in " << timeToClearSendBuffer << "ms, waiting for " << readTimeout << "ms for receiver buffer to clear"; // readWithTimeout internally checks for abort periodically int numRead = socket_->readWithTimeout(buf_, 1, readTimeout); if (numRead != 1) { WTLOG(ERROR) << "Failed to read receiver cmd " << numRead; return SOCKET_READ_ERROR; } return OK; }
std::unique_ptr<ClientSocket> SenderThread::connectToReceiver( const int port, IAbortChecker const *abortChecker, ErrorCode &errCode) { auto startTime = Clock::now(); int connectAttempts = 0; std::unique_ptr<ClientSocket> socket; const EncryptionParams &encryptionData = wdtParent_->transferRequest_.encryptionData; if (!wdtParent_->socketCreator_) { // socket creator not set, creating ClientSocket socket = folly::make_unique<ClientSocket>( *threadCtx_, wdtParent_->destHost_, port, encryptionData); } else { socket = wdtParent_->socketCreator_->makeSocket( *threadCtx_, wdtParent_->destHost_, port, encryptionData); } double retryInterval = options_.sleep_millis; int maxRetries = options_.max_retries; if (maxRetries < 1) { WTLOG(ERROR) << "Invalid max_retries " << maxRetries << " using 1 instead"; maxRetries = 1; } for (int i = 1; i <= maxRetries; ++i) { ++connectAttempts; errCode = socket->connect(); if (errCode == OK) { break; } else if (errCode == CONN_ERROR) { return nullptr; } if (getThreadAbortCode() != OK) { errCode = ABORT; return nullptr; } if (i != maxRetries) { // sleep between attempts but not after the last WVTLOG(1) << "Sleeping after failed attempt " << i; /* sleep override */ usleep(retryInterval * 1000); } } double elapsedSecsConn = durationSeconds(Clock::now() - startTime); if (errCode != OK) { WTLOG(ERROR) << "Unable to connect to " << wdtParent_->destHost_ << " " << port << " despite " << connectAttempts << " retries in " << elapsedSecsConn << " seconds."; errCode = CONN_ERROR; return nullptr; } ((connectAttempts > 1) ? WTLOG(WARNING) : WTLOG(INFO)) << "Connection took " << connectAttempts << " attempt(s) and " << elapsedSecsConn << " seconds. port " << port; return socket; }
void SenderThread::start() { Clock::time_point startTime = Clock::now(); if (buf_ == nullptr) { WTLOG(ERROR) << "Unable to allocate buffer"; threadStats_.setLocalErrorCode(MEMORY_ALLOCATION_ERROR); return; } setFooterType(); controller_->executeAtStart([&]() { wdtParent_->startNewTransfer(); }); SenderState state = CONNECT; while (state != END) { ErrorCode abortCode = getThreadAbortCode(); if (abortCode != OK) { WTLOG(ERROR) << "Transfer aborted " << errorCodeToStr(abortCode); threadStats_.setLocalErrorCode(ABORT); if (abortCode == VERSION_MISMATCH) { state = PROCESS_VERSION_MISMATCH; } else { break; } } state = (this->*stateMap_[state])(); } EncryptionType encryptionType = (socket_ ? socket_->getEncryptionType() : ENC_NONE); threadStats_.setEncryptionType(encryptionType); double totalTime = durationSeconds(Clock::now() - startTime); WTLOG(INFO) << "Port " << port_ << " done. " << threadStats_ << " Total throughput = " << threadStats_.getEffectiveTotalBytes() / totalTime / kMbToB << " Mbytes/sec"; ThreadTransferHistory &transferHistory = getTransferHistory(); transferHistory.markNotInUse(); controller_->deRegisterThread(threadIndex_); controller_->executeAtEnd([&]() { wdtParent_->endCurTransfer(); }); // Important to delete the socket before the thread dies for sub class // of clientsocket which have thread local data socket_ = nullptr; return; }
SenderState SenderThread::connect() { WTVLOG(1) << "entered CONNECT state"; if (socket_) { ErrorCode socketErrCode = socket_->getNonRetryableErrCode(); if (socketErrCode != OK) { WTLOG(ERROR) << "Socket has non-retryable error " << errorCodeToStr(socketErrCode); threadStats_.setLocalErrorCode(socketErrCode); return END; } socket_->closeNoCheck(); } if (numReconnectWithoutProgress_ >= options_.max_transfer_retries) { WTLOG(ERROR) << "Sender thread reconnected " << numReconnectWithoutProgress_ << " times without making any progress, giving up. port: " << socket_->getPort(); threadStats_.setLocalErrorCode(NO_PROGRESS); return END; } ErrorCode code; // TODO cleanup more but for now avoid having 2 socket object live per port socket_ = nullptr; socket_ = connectToReceiver(port_, threadCtx_->getAbortChecker(), code); if (code == ABORT) { threadStats_.setLocalErrorCode(ABORT); if (getThreadAbortCode() == VERSION_MISMATCH) { return PROCESS_VERSION_MISMATCH; } return END; } if (code != OK) { threadStats_.setLocalErrorCode(code); return END; } auto nextState = SEND_SETTINGS; if (threadStats_.getLocalErrorCode() != OK) { nextState = READ_LOCAL_CHECKPOINT; } // resetting the status of thread reset(); return nextState; }
TransferStats SenderThread::sendOneByteSource( const std::unique_ptr<ByteSource> &source, ErrorCode transferStatus) { TransferStats stats; char headerBuf[Protocol::kMaxHeader]; int64_t off = 0; headerBuf[off++] = Protocol::FILE_CMD; headerBuf[off++] = transferStatus; char *headerLenPtr = headerBuf + off; off += sizeof(int16_t); const int64_t expectedSize = source->getSize(); int64_t actualSize = 0; const SourceMetaData &metadata = source->getMetaData(); BlockDetails blockDetails; blockDetails.fileName = metadata.relPath; blockDetails.seqId = metadata.seqId; blockDetails.fileSize = metadata.size; blockDetails.offset = source->getOffset(); blockDetails.dataSize = expectedSize; blockDetails.allocationStatus = metadata.allocationStatus; blockDetails.prevSeqId = metadata.prevSeqId; Protocol::encodeHeader(wdtParent_->getProtocolVersion(), headerBuf, off, Protocol::kMaxHeader, blockDetails); int16_t littleEndianOff = folly::Endian::little((int16_t)off); folly::storeUnaligned<int16_t>(headerLenPtr, littleEndianOff); int64_t written = socket_->write(headerBuf, off); if (written != off) { WTPLOG(ERROR) << "Write error/mismatch " << written << " " << off << ". fd = " << socket_->getFd() << ". file = " << metadata.relPath << ". port = " << socket_->getPort(); stats.setLocalErrorCode(SOCKET_WRITE_ERROR); stats.incrFailedAttempts(); return stats; } stats.addHeaderBytes(written); int64_t byteSourceHeaderBytes = written; int64_t throttlerInstanceBytes = byteSourceHeaderBytes; int64_t totalThrottlerBytes = 0; WTVLOG(3) << "Sent " << written << " on " << socket_->getFd() << " : " << folly::humanify(std::string(headerBuf, off)); int32_t checksum = 0; while (!source->finished()) { // TODO: handle protocol errors from readHeartBeats readHeartBeats(); int64_t size; char *buffer = source->read(size); if (source->hasError()) { WTLOG(ERROR) << "Failed reading file " << source->getIdentifier() << " for fd " << socket_->getFd(); break; } WDT_CHECK(buffer && size > 0); if (footerType_ == CHECKSUM_FOOTER) { checksum = folly::crc32c((const uint8_t *)buffer, size, checksum); } if (wdtParent_->getThrottler()) { /** * If throttling is enabled we call limit(deltaBytes) which * used both the methods of throttling peak and average. * Always call it with bytes being written to the wire, throttler * will do the rest. * The first time throttle is called with the header bytes * included. In the next iterations throttler is only called * with the bytes being written. */ throttlerInstanceBytes += size; wdtParent_->getThrottler()->limit(*threadCtx_, throttlerInstanceBytes); totalThrottlerBytes += throttlerInstanceBytes; throttlerInstanceBytes = 0; } written = socket_->write(buffer, size, /* retry writes */ true); if (getThreadAbortCode() != OK) { WTLOG(ERROR) << "Transfer aborted during block transfer " << socket_->getPort() << " " << source->getIdentifier(); stats.setLocalErrorCode(ABORT); stats.incrFailedAttempts(); return stats; } if (written != size) { WTLOG(ERROR) << "Write error " << written << " (" << size << ")" << ". fd = " << socket_->getFd() << ". file = " << metadata.relPath << ". port = " << socket_->getPort(); stats.setLocalErrorCode(SOCKET_WRITE_ERROR); stats.incrFailedAttempts(); return stats; } stats.addDataBytes(written); actualSize += written; } if (actualSize != expectedSize) { // Can only happen if sender thread can not read complete source byte // stream WTLOG(ERROR) << "UGH " << source->getIdentifier() << " " << expectedSize << " " << actualSize; struct stat fileStat; if (stat(metadata.fullPath.c_str(), &fileStat) != 0) { WTPLOG(ERROR) << "stat failed on path " << metadata.fullPath; } else { WTLOG(WARNING) << "file " << source->getIdentifier() << " previous size " << metadata.size << " current size " << fileStat.st_size; } stats.setLocalErrorCode(BYTE_SOURCE_READ_ERROR); stats.incrFailedAttempts(); return stats; } if (wdtParent_->getThrottler() && actualSize > 0) { WDT_CHECK(totalThrottlerBytes == actualSize + byteSourceHeaderBytes) << totalThrottlerBytes << " " << (actualSize + totalThrottlerBytes); } if (footerType_ != NO_FOOTER) { off = 0; headerBuf[off++] = Protocol::FOOTER_CMD; Protocol::encodeFooter(headerBuf, off, Protocol::kMaxFooter, checksum); int toWrite = off; written = socket_->write(headerBuf, toWrite); if (written != toWrite) { WTLOG(ERROR) << "Write mismatch " << written << " " << toWrite; stats.setLocalErrorCode(SOCKET_WRITE_ERROR); stats.incrFailedAttempts(); return stats; } stats.addHeaderBytes(toWrite); } stats.setLocalErrorCode(OK); stats.incrNumBlocks(); stats.addEffectiveBytes(stats.getHeaderBytes(), stats.getDataBytes()); return stats; }