Ejemplo n.º 1
0
std::ostream& operator<<(std::ostream& os, const TransferStats& stats) {
  folly::RWSpinLock::ReadHolder lock(stats.mutex_.get());
  double headerOverhead = 100;
  double failureOverhead = 100;

  if (stats.effectiveDataBytes_ > 0) {
    headerOverhead = 100.0 * stats.headerBytes_ / stats.effectiveDataBytes_;
    failureOverhead = 100.0 * (stats.dataBytes_ - stats.effectiveDataBytes_) /
                      stats.effectiveDataBytes_;
  }

  if (stats.errCode_ == OK && stats.remoteErrCode_ == OK) {
    os << "Transfer status = OK.";
  } else {
    os << "Transfer status (local) = " << errorCodeToStr(stats.errCode_)
       << ", (remote) = " << errorCodeToStr(stats.remoteErrCode_) << ".";
  }

  if (stats.numFiles_ > 0) {
    os << " Number of files transferred = " << stats.numFiles_ << ".";
  } else {
    os << " Number of blocks transferred = " << stats.numBlocks_ << ".";
  }
  os << " Data Mbytes = " << stats.effectiveDataBytes_ / kMbToB
     << ". Header kBytes = " << stats.headerBytes_ / 1024. << " ("
     << headerOverhead << "% overhead)"
     << ". Total bytes = " << (stats.dataBytes_ + stats.headerBytes_)
     << ". Wasted bytes due to failure = "
     << (stats.dataBytes_ - stats.effectiveDataBytes_) << " ("
     << failureOverhead << "% overhead).";
  return os;
}
Ejemplo n.º 2
0
TransferReport::TransferReport(
    std::vector<TransferStats>& transferredSourceStats,
    std::vector<TransferStats>& failedSourceStats,
    std::vector<TransferStats>& threadStats,
    std::vector<std::string>& failedDirectories, double totalTime,
    int64_t totalFileSize, int64_t numDiscoveredFiles)
    : transferredSourceStats_(std::move(transferredSourceStats)),
      failedSourceStats_(std::move(failedSourceStats)),
      threadStats_(std::move(threadStats)),
      failedDirectories_(std::move(failedDirectories)),
      totalTime_(totalTime),
      totalFileSize_(totalFileSize) {
  for (const auto& stats : threadStats_) {
    summary_ += stats;
  }
  ErrorCode summaryErrorCode = summary_.getErrorCode();
  bool atLeastOneOk = false;
  for (auto& stats : threadStats_) {
    if (stats.getErrorCode() == OK) {
      atLeastOneOk = true;
      break;
    }
  }
  LOG(INFO) << "Error code summary " << errorCodeToStr(summaryErrorCode);
  // none of the files or directories failed
  bool possiblyOk = true;
  if (!failedDirectories_.empty()) {
    possiblyOk = false;
    summaryErrorCode =
        getMoreInterestingError(summaryErrorCode, BYTE_SOURCE_READ_ERROR);
  }
  for (const auto& sourceStat : failedSourceStats_) {
    possiblyOk = false;
    summaryErrorCode =
        getMoreInterestingError(summaryErrorCode, sourceStat.getErrorCode());
  }
  if (possiblyOk && atLeastOneOk) {
    if (summaryErrorCode != OK) {
      LOG(WARNING) << "WDT successfully recovered from error "
                   << errorCodeToStr(summaryErrorCode);
    }
    summaryErrorCode = OK;
  }
  setErrorCode(summaryErrorCode);

  if (summary_.getEffectiveDataBytes() != totalFileSize_) {
    // sender did not send all the bytes
    LOG(INFO) << "Could not send all the bytes " << totalFileSize_ << " "
              << summary_.getEffectiveDataBytes();
    WDT_CHECK(summaryErrorCode != OK)
        << "BUG: All threads OK yet sized based error detected";
  }
  std::set<std::string> failedFilesSet;
  for (auto& stats : failedSourceStats_) {
    failedFilesSet.insert(stats.getId());
  }
  int64_t numTransferredFiles = numDiscoveredFiles - failedFilesSet.size();
  summary_.setNumFiles(numTransferredFiles);
}
Ejemplo n.º 3
0
ErrorCode WdtSocket::closeConnectionInternal(bool doTagIOs) {
  VLOG(1) << "Closing socket " << port_ << " " << fd_;
  if (fd_ < 0) {
    return OK;
  }
  ErrorCode errorCode = getNonRetryableErrCode();
  if (!writesFinalized_) {
    errorCode = getMoreInterestingError(errorCode, finalizeWrites(doTagIOs));
  }
  if (!readsFinalized_) {
    errorCode = getMoreInterestingError(errorCode, finalizeReads(doTagIOs));
  }
  if (::close(fd_) != 0) {
    PLOG(ERROR) << "Failed to close socket " << fd_ << " " << port_;
    errorCode = getMoreInterestingError(ERROR, errorCode);
  }
  // This looks like a reset() make it explicit (and check it's complete)
  fd_ = -1;
  readErrorCode_ = OK;
  writeErrorCode_ = OK;
  encryptionSettingsRead_ = false;
  encryptionSettingsWritten_ = false;
  writesFinalized_ = false;
  readsFinalized_ = false;
  VLOG(1) << "Error code from close " << errorCodeToStr(errorCode);
  return errorCode;
}
Ejemplo n.º 4
0
TransferReport::TransferReport(TransferStats&& globalStats)
    : summary_(std::move(globalStats)) {
  const int64_t numBlocksSend = summary_.getNumBlocksSend();
  const int64_t numBlocksReceived = summary_.getNumBlocks();
  const int64_t numBytesSend = summary_.getTotalSenderBytes();
  const int64_t numBytesReceived = summary_.getEffectiveDataBytes();
  ErrorCode summaryErrorCode = summary_.getLocalErrorCode();
  VLOG(1) << "Summary Error Code " << summaryErrorCode;
  // TODO this is messy and error (ah!) prone
  if (numBlocksSend == -1 || numBlocksSend != numBlocksReceived) {
    LOG(ERROR) << "Did not receive all the blocks sent by the sender "
               << numBlocksSend << " " << numBlocksReceived;
    summaryErrorCode = getMoreInterestingError(summaryErrorCode, ERROR);
  } else if (numBytesSend != -1 && numBytesSend != numBytesReceived) {
    // did not receive all the bytes
    LOG(ERROR) << "Number of bytes sent and received do not match "
               << numBytesSend << " " << numBytesReceived;
    summaryErrorCode = getMoreInterestingError(summaryErrorCode, ERROR);
  } else {
    // We got all the bytes... but if we have an encryption error we should
    // make it stick (unlike the connection error on a single port...)
    if (summaryErrorCode != OK && summaryErrorCode != ENCRYPTION_ERROR) {
      LOG(WARNING) << "All bytes received, turning "
                   << errorCodeToStr(summaryErrorCode) << " into local OK";
      summaryErrorCode = OK;
    }
  }
  // only the local error code is set here. Any remote error means transfer
  // failure. Since, getErrorCode checks both local and remote codes, it will
  // return the correct one
  summary_.setLocalErrorCode(summaryErrorCode);
}
Ejemplo n.º 5
0
ErrorCode SenderThread::readNextReceiverCmd() {
  int numUnackedBytes = socket_->getUnackedBytes();
  int timeToClearSendBuffer = 0;
  Clock::time_point startTime = Clock::now();
  while (true) {
    int numRead = socket_->read(buf_, 1);
    if (numRead == 1) {
      return OK;
    }
    if (getThreadAbortCode() != OK) {
      return ABORT;
    }
    if (numRead == 0) {
      WTPLOG(ERROR) << "Got unexpected EOF, reconnecting";
      return SOCKET_READ_ERROR;
    }
    WDT_CHECK_LT(numRead, 0);
    ErrorCode errCode = socket_->getReadErrCode();
    WTLOG(ERROR) << "Failed to read receiver cmd " << numRead << " "
                 << errorCodeToStr(errCode);
    if (errCode != WDT_TIMEOUT) {
      // not timed out
      return SOCKET_READ_ERROR;
    }
    int curUnackedBytes = socket_->getUnackedBytes();
    if (numUnackedBytes < 0 || curUnackedBytes < 0) {
      WTLOG(ERROR) << "Failed to read number of unacked bytes, reconnecting";
      return SOCKET_READ_ERROR;
    }
    WDT_CHECK_GE(numUnackedBytes, curUnackedBytes);
    if (curUnackedBytes == 0) {
      timeToClearSendBuffer = durationMillis(Clock::now() - startTime);
      break;
    }
    if (curUnackedBytes == numUnackedBytes) {
      WTLOG(ERROR) << "Number of unacked bytes did not change, reconnecting "
                   << curUnackedBytes;
      return SOCKET_READ_ERROR;
    }
    WTLOG(INFO) << "Read receiver command failed, but number of unacked "
                   "bytes decreased, retrying socket read "
                << numUnackedBytes << " " << curUnackedBytes;
    numUnackedBytes = curUnackedBytes;
  }
  // we are assuming that sender and receiver tcp buffer sizes are same. So, we
  // expect another timeToClearSendBuffer milliseconds for receiver to clear its
  // buffer
  int readTimeout = timeToClearSendBuffer + options_.drain_extra_ms;
  WTLOG(INFO) << "Send buffer cleared in " << timeToClearSendBuffer
              << "ms, waiting for " << readTimeout
              << "ms for receiver buffer to clear";
  // readWithTimeout internally checks for abort periodically
  int numRead = socket_->readWithTimeout(buf_, 1, readTimeout);
  if (numRead != 1) {
    WTLOG(ERROR) << "Failed to read receiver cmd " << numRead;
    return SOCKET_READ_ERROR;
  }
  return OK;
}
Ejemplo n.º 6
0
ErrorCode Wdt::wdtSend(const std::string &wdtNamespace,
                       const WdtTransferRequest &req,
                       std::shared_ptr<IAbortChecker> abortChecker,
                       bool terminateExistingOne) {
  if (!settingsApplied_) {
    applySettings();
  }

  if (req.errorCode != OK) {
    LOG(ERROR) << "Transfer request error " << errorCodeToStr(req.errorCode);
    return req.errorCode;
  }
  // Protocol issues will/should be flagged as error when we call createSender

  // try to create sender
  SenderPtr sender;
  auto wdtController = WdtResourceController::get();
  // TODO should be using recoverid
  const std::string secondKey = req.hostName;
  ErrorCode errCode =
      wdtController->createSender(wdtNamespace, secondKey, req, sender);
  if (errCode == ALREADY_EXISTS && terminateExistingOne) {
    LOG(WARNING) << "Found pre-existing sender for " << wdtNamespace << " "
                 << secondKey << " aborting it and making a new one";
    sender->abort(ABORTED_BY_APPLICATION);
    // This may log an error too
    wdtController->releaseSender(wdtNamespace, secondKey);
    // Try#2
    errCode = wdtController->createSender(wdtNamespace, secondKey, req, sender);
  }
  if (errCode != OK) {
    LOG(ERROR) << "Failed to create sender " << errorCodeToStr(errCode) << " "
               << wdtNamespace << " " << secondKey;
    return errCode;
  }
  wdtSetAbortSocketCreatorAndReporter(wdtNamespace, sender.get(), req,
                                      abortChecker);

  auto transferReport = sender->transfer();
  ErrorCode ret = transferReport->getSummary().getErrorCode();
  wdtController->releaseSender(wdtNamespace, secondKey);
  LOG(INFO) << "wdtSend for " << wdtNamespace << " " << secondKey << " "
            << " ended with " << errorCodeToStr(ret);
  return ret;
}
Ejemplo n.º 7
0
string WdtTransferRequest::generateUrl(bool genFull) const {
  if (errorCode == ERROR || errorCode == URI_PARSE_ERROR) {
    LOG(ERROR) << "Transfer request has errors present ";
    return errorCodeToStr(errorCode);
  }
  WdtUri wdtUri;
  wdtUri.setHostName(hostName);
  wdtUri.setQueryParam(TRANSFER_ID_PARAM, transferId);
  wdtUri.setQueryParam(PROTOCOL_VERSION_PARAM,
                       folly::to<string>(protocolVersion));
  wdtUri.setQueryParam(PORTS_PARAM, getSerializedPortsList());
  if (genFull) {
    wdtUri.setQueryParam(DIRECTORY_PARAM, directory);
  }
  return wdtUri.generateUrl();
}
Ejemplo n.º 8
0
void SenderThread::start() {
  Clock::time_point startTime = Clock::now();

  if (buf_ == nullptr) {
    WTLOG(ERROR) << "Unable to allocate buffer";
    threadStats_.setLocalErrorCode(MEMORY_ALLOCATION_ERROR);
    return;
  }

  setFooterType();

  controller_->executeAtStart([&]() { wdtParent_->startNewTransfer(); });
  SenderState state = CONNECT;

  while (state != END) {
    ErrorCode abortCode = getThreadAbortCode();
    if (abortCode != OK) {
      WTLOG(ERROR) << "Transfer aborted " << errorCodeToStr(abortCode);
      threadStats_.setLocalErrorCode(ABORT);
      if (abortCode == VERSION_MISMATCH) {
        state = PROCESS_VERSION_MISMATCH;
      } else {
        break;
      }
    }
    state = (this->*stateMap_[state])();
  }

  EncryptionType encryptionType =
      (socket_ ? socket_->getEncryptionType() : ENC_NONE);
  threadStats_.setEncryptionType(encryptionType);
  double totalTime = durationSeconds(Clock::now() - startTime);
  WTLOG(INFO) << "Port " << port_ << " done. " << threadStats_
              << " Total throughput = "
              << threadStats_.getEffectiveTotalBytes() / totalTime / kMbToB
              << " Mbytes/sec";

  ThreadTransferHistory &transferHistory = getTransferHistory();
  transferHistory.markNotInUse();
  controller_->deRegisterThread(threadIndex_);
  controller_->executeAtEnd([&]() { wdtParent_->endCurTransfer(); });
  // Important to delete the socket before the thread dies for sub class
  // of clientsocket which have thread local data
  socket_ = nullptr;

  return;
}
Ejemplo n.º 9
0
SenderState SenderThread::connect() {
  WTVLOG(1) << "entered CONNECT state";
  if (socket_) {
    ErrorCode socketErrCode = socket_->getNonRetryableErrCode();
    if (socketErrCode != OK) {
      WTLOG(ERROR) << "Socket has non-retryable error "
                   << errorCodeToStr(socketErrCode);
      threadStats_.setLocalErrorCode(socketErrCode);
      return END;
    }
    socket_->closeNoCheck();
  }
  if (numReconnectWithoutProgress_ >= options_.max_transfer_retries) {
    WTLOG(ERROR) << "Sender thread reconnected " << numReconnectWithoutProgress_
                 << " times without making any progress, giving up. port: "
                 << socket_->getPort();
    threadStats_.setLocalErrorCode(NO_PROGRESS);
    return END;
  }
  ErrorCode code;
  // TODO cleanup more but for now avoid having 2 socket object live per port
  socket_ = nullptr;
  socket_ = connectToReceiver(port_, threadCtx_->getAbortChecker(), code);
  if (code == ABORT) {
    threadStats_.setLocalErrorCode(ABORT);
    if (getThreadAbortCode() == VERSION_MISMATCH) {
      return PROCESS_VERSION_MISMATCH;
    }
    return END;
  }
  if (code != OK) {
    threadStats_.setLocalErrorCode(code);
    return END;
  }
  auto nextState = SEND_SETTINGS;
  if (threadStats_.getLocalErrorCode() != OK) {
    nextState = READ_LOCAL_CHECKPOINT;
  }
  // resetting the status of thread
  reset();
  return nextState;
}
Ejemplo n.º 10
0
SenderState SenderThread::processDoneCmd() {
  WTVLOG(1) << "entered PROCESS_DONE_CMD state";
  // DONE cmd implies that all the blocks sent till now is acked
  ThreadTransferHistory &transferHistory = getTransferHistory();
  transferHistory.markAllAcknowledged();

  // send ack for DONE
  buf_[0] = Protocol::DONE_CMD;
  socket_->write(buf_, 1);

  socket_->shutdownWrites();
  ErrorCode retCode = socket_->expectEndOfStream();
  if (retCode != OK) {
    WTLOG(WARNING) << "Logical EOF not found when expected "
                   << errorCodeToStr(retCode);
    threadStats_.setLocalErrorCode(retCode);
    return CONNECT;
  }
  WTVLOG(1) << "done with transfer, port " << port_;
  return END;
}
Ejemplo n.º 11
0
SenderState SenderThread::processAbortCmd() {
  WTLOG(INFO) << "entered PROCESS_ABORT_CMD state ";
  ThreadTransferHistory &transferHistory = getTransferHistory();
  threadStats_.setLocalErrorCode(ABORT);
  int toRead = Protocol::kAbortLength;
  auto numRead = socket_->read(buf_, toRead);
  if (numRead != toRead) {
    // can not read checkpoint, but still must exit because of ABORT
    WTLOG(ERROR) << "Error while trying to read ABORT cmd " << numRead << " "
                 << toRead;
    return END;
  }
  int64_t offset = 0;
  int32_t negotiatedProtocol;
  ErrorCode remoteError;
  int64_t checkpoint;
  Protocol::decodeAbort(buf_, offset, bufSize_, negotiatedProtocol, remoteError,
                        checkpoint);
  threadStats_.setRemoteErrorCode(remoteError);
  std::string failedFileName = transferHistory.getSourceId(checkpoint);
  WTLOG(WARNING) << "Received abort on "
                 << " remote protocol version " << negotiatedProtocol
                 << " remote error code " << errorCodeToStr(remoteError)
                 << " file " << failedFileName << " checkpoint " << checkpoint;
  wdtParent_->abort(remoteError);
  if (remoteError == VERSION_MISMATCH) {
    if (Protocol::negotiateProtocol(
            negotiatedProtocol, threadProtocolVersion_) == negotiatedProtocol) {
      // sender can support this negotiated version
      negotiatedProtocol_ = negotiatedProtocol;
      return PROCESS_VERSION_MISMATCH;
    } else {
      WTLOG(ERROR) << "Sender can not support receiver version "
                   << negotiatedProtocol;
      threadStats_.setRemoteErrorCode(VERSION_INCOMPATIBLE);
    }
  }
  return END;
}
Ejemplo n.º 12
0
ErrorCode TransferLogManager::parseVerifyAndFix(
    const std::string &recoveryId, int64_t config, bool parseOnly,
    std::vector<FileChunksInfo> &parsedInfo) {
  if (fd_ < 0) {
    return INVALID_LOG;
  }
  LogParser parser(options_, encoderDecoder_, rootDir_, recoveryId, config,
                   parseOnly);
  resumptionStatus_ = parser.parseLog(fd_, senderIp_, parsedInfo);
  if (resumptionStatus_ == INVALID_LOG) {
    // leave the log, but close it. Keeping the invalid log ensures that the
    // directory remains invalid. Closing the log means that nothing else can
    // be written to it again
    closeLog();
  } else if (resumptionStatus_ == INCONSISTENT_DIRECTORY) {
    if (!parseOnly) {
      // if we are only parsing, we should not modify anything
      invalidateDirectory();
    }
  }
  LOG(INFO) << "Transfer log parsing finished "
            << errorCodeToStr(resumptionStatus_);
  return resumptionStatus_;
}
Ejemplo n.º 13
0
const WdtTransferRequest &Receiver::init() {
  if (validateTransferRequest() != OK) {
    WLOG(ERROR) << "Couldn't validate the transfer request "
                << transferRequest_.getLogSafeString();
    return transferRequest_;
  }
  checkAndUpdateBufferSize();
  backlog_ = options_.backlog;
  if (getTransferId().empty()) {
    setTransferId(WdtBase::generateTransferId());
  }
  setProtocolVersion(transferRequest_.protocolVersion);
  setDir(transferRequest_.directory);
  auto numThreads = transferRequest_.ports.size();
  // This creates the destination directory (which is needed for transferLogMgr)
  fileCreator_.reset(new FileCreator(destDir_, numThreads, transferLogManager_,
                                     options_.skip_writes));
  // Make sure we can get the lock on the transfer log manager early
  // so if we can't we don't generate a valid but useless url and end up
  // starting a sender doomed to fail
  if (options_.enable_download_resumption) {
    WDT_CHECK(!options_.skip_writes)
        << "Can not skip transfers with download resumption turned on";
    if (options_.resume_using_dir_tree) {
      WDT_CHECK(!options_.shouldPreallocateFiles())
          << "Can not resume using directory tree if preallocation is enabled";
    }
    ErrorCode errCode = transferLogManager_.openLog();
    if (errCode != OK) {
      WLOG(ERROR) << "Failed to open transfer log " << errorCodeToStr(errCode);
      transferRequest_.errorCode = errCode;
      return transferRequest_;
    }
    ErrorCode code = transferLogManager_.parseAndMatch(
        recoveryId_, getTransferConfig(), fileChunksInfo_);
    if (code == OK && options_.resume_using_dir_tree) {
      WDT_CHECK(fileChunksInfo_.empty());
      traverseDestinationDir(fileChunksInfo_);
    }
  }

  EncryptionType encryptionType = parseEncryptionType(options_.encryption_type);
  // is encryption enabled?
  bool encrypt = (encryptionType != ENC_NONE &&
                  protocolVersion_ >= Protocol::ENCRYPTION_V1_VERSION);
  if (encrypt) {
    WLOG(INFO) << encryptionTypeToStr(encryptionType)
               << " encryption is enabled for this transfer ";
    if (!transferRequest_.encryptionData.isSet()) {
      WLOG(INFO) << "Receiver generating encryption key for type "
                 << encryptionTypeToStr(encryptionType);
      transferRequest_.encryptionData =
          EncryptionParams::generateEncryptionParams(encryptionType);
    }
    if (!transferRequest_.encryptionData.isSet()) {
      WLOG(ERROR) << "Unable to generate encryption key for type "
                  << encryptionTypeToStr(encryptionType);
      transferRequest_.errorCode = ENCRYPTION_ERROR;
      return transferRequest_;
    }
  } else {
    if (encryptionType != ENC_NONE) {
      WLOG(WARNING) << "Encryption is enabled, but protocol version is "
                    << protocolVersion_
                    << ", minimum version required for encryption is "
                    << Protocol::ENCRYPTION_V1_VERSION;
    }
    transferRequest_.encryptionData.erase();
  }
  threadsController_ = new ThreadsController(numThreads);
  threadsController_->setNumFunnels(ReceiverThread::NUM_FUNNELS);
  threadsController_->setNumBarriers(ReceiverThread::NUM_BARRIERS);
  threadsController_->setNumConditions(ReceiverThread::NUM_CONDITIONS);
  // TODO: take transferRequest directly !
  receiverThreads_ = threadsController_->makeThreads<Receiver, ReceiverThread>(
      this, transferRequest_.ports.size(), transferRequest_.ports);
  size_t numSuccessfulInitThreads = 0;
  for (auto &receiverThread : receiverThreads_) {
    ErrorCode code = receiverThread->init();
    if (code == OK) {
      ++numSuccessfulInitThreads;
    }
  }
  WLOG(INFO) << "Registered " << numSuccessfulInitThreads
             << " successful sockets";
  ErrorCode code = OK;
  const size_t targetSize = transferRequest_.ports.size();
  // TODO: replace with getNumPorts/thread
  if (numSuccessfulInitThreads != targetSize) {
    code = FEWER_PORTS;
    if (numSuccessfulInitThreads == 0) {
      code = ERROR;
    }
  }
  transferRequest_.protocolVersion = protocolVersion_;

  transferRequest_.ports.clear();
  for (const auto &receiverThread : receiverThreads_) {
    transferRequest_.ports.push_back(receiverThread->getPort());
  }

  if (transferRequest_.hostName.empty()) {
    char hostName[1024];
    int ret = gethostname(hostName, sizeof(hostName));
    if (ret == 0) {
      transferRequest_.hostName.assign(hostName);
    } else {
      PLOG(ERROR) << "Couldn't find the host name";
      code = ERROR;
    }
  }
  transferRequest_.directory = getDir();
  transferRequest_.errorCode = code;
  return transferRequest_;
}