Exemple #1
0
ErrorCode SenderThread::readNextReceiverCmd() {
  int numUnackedBytes = socket_->getUnackedBytes();
  int timeToClearSendBuffer = 0;
  Clock::time_point startTime = Clock::now();
  while (true) {
    int numRead = socket_->read(buf_, 1);
    if (numRead == 1) {
      return OK;
    }
    if (getThreadAbortCode() != OK) {
      return ABORT;
    }
    if (numRead == 0) {
      WTPLOG(ERROR) << "Got unexpected EOF, reconnecting";
      return SOCKET_READ_ERROR;
    }
    WDT_CHECK_LT(numRead, 0);
    ErrorCode errCode = socket_->getReadErrCode();
    WTLOG(ERROR) << "Failed to read receiver cmd " << numRead << " "
                 << errorCodeToStr(errCode);
    if (errCode != WDT_TIMEOUT) {
      // not timed out
      return SOCKET_READ_ERROR;
    }
    int curUnackedBytes = socket_->getUnackedBytes();
    if (numUnackedBytes < 0 || curUnackedBytes < 0) {
      WTLOG(ERROR) << "Failed to read number of unacked bytes, reconnecting";
      return SOCKET_READ_ERROR;
    }
    WDT_CHECK_GE(numUnackedBytes, curUnackedBytes);
    if (curUnackedBytes == 0) {
      timeToClearSendBuffer = durationMillis(Clock::now() - startTime);
      break;
    }
    if (curUnackedBytes == numUnackedBytes) {
      WTLOG(ERROR) << "Number of unacked bytes did not change, reconnecting "
                   << curUnackedBytes;
      return SOCKET_READ_ERROR;
    }
    WTLOG(INFO) << "Read receiver command failed, but number of unacked "
                   "bytes decreased, retrying socket read "
                << numUnackedBytes << " " << curUnackedBytes;
    numUnackedBytes = curUnackedBytes;
  }
  // we are assuming that sender and receiver tcp buffer sizes are same. So, we
  // expect another timeToClearSendBuffer milliseconds for receiver to clear its
  // buffer
  int readTimeout = timeToClearSendBuffer + options_.drain_extra_ms;
  WTLOG(INFO) << "Send buffer cleared in " << timeToClearSendBuffer
              << "ms, waiting for " << readTimeout
              << "ms for receiver buffer to clear";
  // readWithTimeout internally checks for abort periodically
  int numRead = socket_->readWithTimeout(buf_, 1, readTimeout);
  if (numRead != 1) {
    WTLOG(ERROR) << "Failed to read receiver cmd " << numRead;
    return SOCKET_READ_ERROR;
  }
  return OK;
}
Exemple #2
0
std::unique_ptr<ClientSocket> SenderThread::connectToReceiver(
    const int port, IAbortChecker const *abortChecker, ErrorCode &errCode) {
  auto startTime = Clock::now();
  int connectAttempts = 0;
  std::unique_ptr<ClientSocket> socket;
  const EncryptionParams &encryptionData =
      wdtParent_->transferRequest_.encryptionData;
  if (!wdtParent_->socketCreator_) {
    // socket creator not set, creating ClientSocket
    socket = folly::make_unique<ClientSocket>(
        *threadCtx_, wdtParent_->destHost_, port, encryptionData);
  } else {
    socket = wdtParent_->socketCreator_->makeSocket(
        *threadCtx_, wdtParent_->destHost_, port, encryptionData);
  }
  double retryInterval = options_.sleep_millis;
  int maxRetries = options_.max_retries;
  if (maxRetries < 1) {
    WTLOG(ERROR) << "Invalid max_retries " << maxRetries << " using 1 instead";
    maxRetries = 1;
  }
  for (int i = 1; i <= maxRetries; ++i) {
    ++connectAttempts;
    errCode = socket->connect();
    if (errCode == OK) {
      break;
    } else if (errCode == CONN_ERROR) {
      return nullptr;
    }
    if (getThreadAbortCode() != OK) {
      errCode = ABORT;
      return nullptr;
    }
    if (i != maxRetries) {
      // sleep between attempts but not after the last
      WVTLOG(1) << "Sleeping after failed attempt " << i;
      /* sleep override */ usleep(retryInterval * 1000);
    }
  }
  double elapsedSecsConn = durationSeconds(Clock::now() - startTime);
  if (errCode != OK) {
    WTLOG(ERROR) << "Unable to connect to " << wdtParent_->destHost_ << " "
                 << port << " despite " << connectAttempts << " retries in "
                 << elapsedSecsConn << " seconds.";
    errCode = CONN_ERROR;
    return nullptr;
  }
  ((connectAttempts > 1) ? WTLOG(WARNING) : WTLOG(INFO))
      << "Connection took " << connectAttempts << " attempt(s) and "
      << elapsedSecsConn << " seconds. port " << port;
  return socket;
}
Exemple #3
0
void SenderThread::start() {
  Clock::time_point startTime = Clock::now();

  if (buf_ == nullptr) {
    WTLOG(ERROR) << "Unable to allocate buffer";
    threadStats_.setLocalErrorCode(MEMORY_ALLOCATION_ERROR);
    return;
  }

  setFooterType();

  controller_->executeAtStart([&]() { wdtParent_->startNewTransfer(); });
  SenderState state = CONNECT;

  while (state != END) {
    ErrorCode abortCode = getThreadAbortCode();
    if (abortCode != OK) {
      WTLOG(ERROR) << "Transfer aborted " << errorCodeToStr(abortCode);
      threadStats_.setLocalErrorCode(ABORT);
      if (abortCode == VERSION_MISMATCH) {
        state = PROCESS_VERSION_MISMATCH;
      } else {
        break;
      }
    }
    state = (this->*stateMap_[state])();
  }

  EncryptionType encryptionType =
      (socket_ ? socket_->getEncryptionType() : ENC_NONE);
  threadStats_.setEncryptionType(encryptionType);
  double totalTime = durationSeconds(Clock::now() - startTime);
  WTLOG(INFO) << "Port " << port_ << " done. " << threadStats_
              << " Total throughput = "
              << threadStats_.getEffectiveTotalBytes() / totalTime / kMbToB
              << " Mbytes/sec";

  ThreadTransferHistory &transferHistory = getTransferHistory();
  transferHistory.markNotInUse();
  controller_->deRegisterThread(threadIndex_);
  controller_->executeAtEnd([&]() { wdtParent_->endCurTransfer(); });
  // Important to delete the socket before the thread dies for sub class
  // of clientsocket which have thread local data
  socket_ = nullptr;

  return;
}
Exemple #4
0
SenderState SenderThread::connect() {
  WTVLOG(1) << "entered CONNECT state";
  if (socket_) {
    ErrorCode socketErrCode = socket_->getNonRetryableErrCode();
    if (socketErrCode != OK) {
      WTLOG(ERROR) << "Socket has non-retryable error "
                   << errorCodeToStr(socketErrCode);
      threadStats_.setLocalErrorCode(socketErrCode);
      return END;
    }
    socket_->closeNoCheck();
  }
  if (numReconnectWithoutProgress_ >= options_.max_transfer_retries) {
    WTLOG(ERROR) << "Sender thread reconnected " << numReconnectWithoutProgress_
                 << " times without making any progress, giving up. port: "
                 << socket_->getPort();
    threadStats_.setLocalErrorCode(NO_PROGRESS);
    return END;
  }
  ErrorCode code;
  // TODO cleanup more but for now avoid having 2 socket object live per port
  socket_ = nullptr;
  socket_ = connectToReceiver(port_, threadCtx_->getAbortChecker(), code);
  if (code == ABORT) {
    threadStats_.setLocalErrorCode(ABORT);
    if (getThreadAbortCode() == VERSION_MISMATCH) {
      return PROCESS_VERSION_MISMATCH;
    }
    return END;
  }
  if (code != OK) {
    threadStats_.setLocalErrorCode(code);
    return END;
  }
  auto nextState = SEND_SETTINGS;
  if (threadStats_.getLocalErrorCode() != OK) {
    nextState = READ_LOCAL_CHECKPOINT;
  }
  // resetting the status of thread
  reset();
  return nextState;
}
Exemple #5
0
TransferStats SenderThread::sendOneByteSource(
    const std::unique_ptr<ByteSource> &source, ErrorCode transferStatus) {
  TransferStats stats;
  char headerBuf[Protocol::kMaxHeader];
  int64_t off = 0;
  headerBuf[off++] = Protocol::FILE_CMD;
  headerBuf[off++] = transferStatus;
  char *headerLenPtr = headerBuf + off;
  off += sizeof(int16_t);
  const int64_t expectedSize = source->getSize();
  int64_t actualSize = 0;
  const SourceMetaData &metadata = source->getMetaData();
  BlockDetails blockDetails;
  blockDetails.fileName = metadata.relPath;
  blockDetails.seqId = metadata.seqId;
  blockDetails.fileSize = metadata.size;
  blockDetails.offset = source->getOffset();
  blockDetails.dataSize = expectedSize;
  blockDetails.allocationStatus = metadata.allocationStatus;
  blockDetails.prevSeqId = metadata.prevSeqId;
  Protocol::encodeHeader(wdtParent_->getProtocolVersion(), headerBuf, off,
                         Protocol::kMaxHeader, blockDetails);
  int16_t littleEndianOff = folly::Endian::little((int16_t)off);
  folly::storeUnaligned<int16_t>(headerLenPtr, littleEndianOff);
  int64_t written = socket_->write(headerBuf, off);
  if (written != off) {
    WTPLOG(ERROR) << "Write error/mismatch " << written << " " << off
                  << ". fd = " << socket_->getFd()
                  << ". file = " << metadata.relPath
                  << ". port = " << socket_->getPort();
    stats.setLocalErrorCode(SOCKET_WRITE_ERROR);
    stats.incrFailedAttempts();
    return stats;
  }

  stats.addHeaderBytes(written);
  int64_t byteSourceHeaderBytes = written;
  int64_t throttlerInstanceBytes = byteSourceHeaderBytes;
  int64_t totalThrottlerBytes = 0;
  WTVLOG(3) << "Sent " << written << " on " << socket_->getFd() << " : "
            << folly::humanify(std::string(headerBuf, off));
  int32_t checksum = 0;
  while (!source->finished()) {
    // TODO: handle protocol errors from readHeartBeats
    readHeartBeats();

    int64_t size;
    char *buffer = source->read(size);
    if (source->hasError()) {
      WTLOG(ERROR) << "Failed reading file " << source->getIdentifier()
                   << " for fd " << socket_->getFd();
      break;
    }
    WDT_CHECK(buffer && size > 0);
    if (footerType_ == CHECKSUM_FOOTER) {
      checksum = folly::crc32c((const uint8_t *)buffer, size, checksum);
    }
    if (wdtParent_->getThrottler()) {
      /**
       * If throttling is enabled we call limit(deltaBytes) which
       * used both the methods of throttling peak and average.
       * Always call it with bytes being written to the wire, throttler
       * will do the rest.
       * The first time throttle is called with the header bytes
       * included. In the next iterations throttler is only called
       * with the bytes being written.
       */
      throttlerInstanceBytes += size;
      wdtParent_->getThrottler()->limit(*threadCtx_, throttlerInstanceBytes);
      totalThrottlerBytes += throttlerInstanceBytes;
      throttlerInstanceBytes = 0;
    }
    written = socket_->write(buffer, size, /* retry writes */ true);
    if (getThreadAbortCode() != OK) {
      WTLOG(ERROR) << "Transfer aborted during block transfer "
                   << socket_->getPort() << " " << source->getIdentifier();
      stats.setLocalErrorCode(ABORT);
      stats.incrFailedAttempts();
      return stats;
    }
    if (written != size) {
      WTLOG(ERROR) << "Write error " << written << " (" << size << ")"
                   << ". fd = " << socket_->getFd()
                   << ". file = " << metadata.relPath
                   << ". port = " << socket_->getPort();
      stats.setLocalErrorCode(SOCKET_WRITE_ERROR);
      stats.incrFailedAttempts();
      return stats;
    }
    stats.addDataBytes(written);
    actualSize += written;
  }
  if (actualSize != expectedSize) {
    // Can only happen if sender thread can not read complete source byte
    // stream
    WTLOG(ERROR) << "UGH " << source->getIdentifier() << " " << expectedSize
                 << " " << actualSize;
    struct stat fileStat;
    if (stat(metadata.fullPath.c_str(), &fileStat) != 0) {
      WTPLOG(ERROR) << "stat failed on path " << metadata.fullPath;
    } else {
      WTLOG(WARNING) << "file " << source->getIdentifier() << " previous size "
                     << metadata.size << " current size " << fileStat.st_size;
    }
    stats.setLocalErrorCode(BYTE_SOURCE_READ_ERROR);
    stats.incrFailedAttempts();
    return stats;
  }
  if (wdtParent_->getThrottler() && actualSize > 0) {
    WDT_CHECK(totalThrottlerBytes == actualSize + byteSourceHeaderBytes)
        << totalThrottlerBytes << " " << (actualSize + totalThrottlerBytes);
  }
  if (footerType_ != NO_FOOTER) {
    off = 0;
    headerBuf[off++] = Protocol::FOOTER_CMD;
    Protocol::encodeFooter(headerBuf, off, Protocol::kMaxFooter, checksum);
    int toWrite = off;
    written = socket_->write(headerBuf, toWrite);
    if (written != toWrite) {
      WTLOG(ERROR) << "Write mismatch " << written << " " << toWrite;
      stats.setLocalErrorCode(SOCKET_WRITE_ERROR);
      stats.incrFailedAttempts();
      return stats;
    }
    stats.addHeaderBytes(toWrite);
  }
  stats.setLocalErrorCode(OK);
  stats.incrNumBlocks();
  stats.addEffectiveBytes(stats.getHeaderBytes(), stats.getDataBytes());
  return stats;
}