예제 #1
0
StatusWith<OplogFetcher::DocumentsInfo> OplogFetcher::validateDocuments(
    const Fetcher::Documents& documents, bool first, Timestamp lastTS) {
    if (first && documents.empty()) {
        return Status(ErrorCodes::OplogStartMissing,
                      str::stream() << "The first batch of oplog entries is empty, but expected at "
                                       "least 1 document matching ts: "
                                    << lastTS.toString());
    }

    DocumentsInfo info;
    // The count of the bytes of the documents read off the network.
    info.networkDocumentBytes = 0;
    info.networkDocumentCount = 0;
    for (auto&& doc : documents) {
        info.networkDocumentBytes += doc.objsize();
        ++info.networkDocumentCount;

        // If this is the first response (to the $gte query) then we already applied the first doc.
        if (first && info.networkDocumentCount == 1U) {
            continue;
        }

        // Check to see if the oplog entry goes back in time for this document.
        const auto docOpTime = OpTime::parseFromOplogEntry(doc);
        // entries must have a "ts" field.
        if (!docOpTime.isOK()) {
            return docOpTime.getStatus();
        }

        info.lastDocument = {doc["h"].numberLong(), docOpTime.getValue()};

        const auto docTS = info.lastDocument.opTime.getTimestamp();
        if (lastTS >= docTS) {
            return Status(ErrorCodes::OplogOutOfOrder,
                          str::stream() << "Out of order entries in oplog. lastTS: "
                                        << lastTS.toString()
                                        << " outOfOrderTS:"
                                        << docTS.toString()
                                        << " in batch with "
                                        << info.networkDocumentCount
                                        << "docs; first-batch:"
                                        << first
                                        << ", doc:"
                                        << doc);
        }
        lastTS = docTS;
    }

    // These numbers are for the documents we will apply.
    info.toApplyDocumentCount = documents.size();
    info.toApplyDocumentBytes = info.networkDocumentBytes;
    if (first) {
        // The count is one less since the first document found was already applied ($gte $ts query)
        // and we will not apply it again.
        --info.toApplyDocumentCount;
        auto alreadyAppliedDocument = documents.cbegin();
        info.toApplyDocumentBytes -= alreadyAppliedDocument->objsize();
    }
    return info;
}
예제 #2
0
void EchoServer::onMessage(const TcpConnectionPtr& conn,
                           Buffer* buf,
                           Timestamp time){
    string msg(buf->retrieveAllAsString());
    LOG_INFO << conn->name() << " echo " << msg.size() << " bytes at " << time.toString();
    conn->send(msg);
}
예제 #3
0
 void clientMessageCallback(const TcpConnectionPtr& conn, ByteBuffer* buf, Timestamp receiveTime)
 {
     string msg(buf->retrieveAllAsString());
     //cout << "EchoServer::onMessage, fd [" << conn->fd() << "], "<< msg.size() << " bytes, ["
     //    << msg.data() << "], received at " << time.toString() << "\n";
     cout << "client recv data : " << msg << " at " <<receiveTime.toString() << "\n";
     //conn->send(msg.data(), msg.size());
 }
예제 #4
0
void WiredTigerRecoveryUnit::setPrepareTimestamp(Timestamp timestamp) {
    invariant(_inUnitOfWork(), toString(_state));
    invariant(_prepareTimestamp.isNull(),
              str::stream() << "Trying to set prepare timestamp to " << timestamp.toString()
                            << ". It's already set to "
                            << _prepareTimestamp.toString());
    invariant(_commitTimestamp.isNull(),
              str::stream() << "Commit timestamp is " << _commitTimestamp.toString()
                            << " and trying to set prepare timestamp to "
                            << timestamp.toString());
    invariant(!_lastTimestampSet,
              str::stream() << "Last timestamp set is " << _lastTimestampSet->toString()
                            << " and trying to set prepare timestamp to "
                            << timestamp.toString());

    _prepareTimestamp = timestamp;
}
예제 #5
0
int main(int argc, char const *argv[])
{
	Timestamp now = Timestamp::now();

	cout << now.toString() << endl;
	cout << now.toFormatString() << endl;
	return 0;
}
예제 #6
0
void WiredTigerRecoveryUnit::setCommitTimestamp(Timestamp timestamp) {
    // This can be called either outside of a WriteUnitOfWork or in a prepared transaction after
    // setPrepareTimestamp() is called. Prepared transactions ensure the correct timestamping
    // semantics and the set-once commitTimestamp behavior is exactly what prepared transactions
    // want.
    invariant(!_inUnitOfWork() || !_prepareTimestamp.isNull(), toString(_state));
    invariant(_commitTimestamp.isNull(),
              str::stream() << "Commit timestamp set to " << _commitTimestamp.toString()
                            << " and trying to set it to "
                            << timestamp.toString());
    invariant(!_lastTimestampSet,
              str::stream() << "Last timestamp set is " << _lastTimestampSet->toString()
                            << " and trying to set commit timestamp to "
                            << timestamp.toString());
    invariant(!_isTimestamped);

    _commitTimestamp = timestamp;
}
예제 #7
0
 void readTimerfd(int timerfd, Timestamp now)
 {
     uint64_t howmany;
     ssize_t n = ::read(timerfd, &howmany, sizeof howmany);
     LOG_TRACE << "TimerQueue::handleRead() " << howmany << " at " << now.toString();
     if (n != sizeof howmany)
     {
         LOG_ERROR << "TimerQueue::handleRead() reads" << n << " bytes instead of 8";
     }
 }
예제 #8
0
 void sendDataByClient()
 {
     static int64_t i = 0;
     if (clientConnection)
     {
         Timestamp now = Timestamp::now();
         char str[] = "hello world";
         cout << "client send data : " << str << " at " << now.toString() <<"\n";
         clientConnection->send(str, sizeof(str));
     }
 }
예제 #9
0
void EventLoop::loop()
{
	assertInLoopThread();
    running_ = true;

    Timestamp now;
    while (running_)
    {
        activeChannels_.clear();

        int timeoutMs = 0;
        now = Timestamp::now();
        Timestamp nextExpired = timerQueue_->getNearestExpiration();
        if(nextExpired.valid())
        {
            
            double seconds = Timestamp::timeDiff(nextExpired, now);
            LOG_INFO("nextExpired.valid() [%s][%s][%lf]", nextExpired.toString().c_str(), now.toString().c_str(), seconds);
            if(seconds <= 0)
                timeoutMs = 0;
            else
                timeoutMs = seconds * 1000;
        }
        else
        {
        #if defined(POLL_WAIT_INDEFINITE)
            timeoutMs = -1;
        #else
            timeoutMs = 0;
        #endif
        }

        now = poller_->poll_once(timeoutMs, activeChannels_);
        //LOG_INFO("EventLoop::loop [%s][%d]", now.toString().c_str(), activeChannels_.size());

        eventHandling_ = true;
        for (ChannelList::iterator it = activeChannels_.begin(); it != activeChannels_.end(); ++it)
        {
            currentActiveChannel_ = *it;
            currentActiveChannel_->handleEvent(now);
        }
        currentActiveChannel_ = NULL;
        eventHandling_ = false;

        timerQueue_->runTimer(now);

        callPendingFunctors();    //处理poll等待过程中发生的事件
    }
}
예제 #10
0
Status WiredTigerRecoveryUnit::setTimestamp(Timestamp timestamp) {
    _ensureSession();
    LOG(3) << "WT set timestamp of future write operations to " << timestamp;
    WT_SESSION* session = _session->getSession();
    invariant(_inUnitOfWork(), toString(_state));
    invariant(_prepareTimestamp.isNull());
    invariant(_commitTimestamp.isNull(),
              str::stream() << "Commit timestamp set to " << _commitTimestamp.toString()
                            << " and trying to set WUOW timestamp to "
                            << timestamp.toString());

    _lastTimestampSet = timestamp;

    // Starts the WT transaction associated with this session.
    getSession();

    const std::string conf = "commit_timestamp=" + integerToHex(timestamp.asULL());
    auto rc = session->timestamp_transaction(session, conf.c_str());
    if (rc == 0) {
        _isTimestamped = true;
    }
    return wtRCToStatus(rc, "timestamp_transaction");
}
예제 #11
0
void XMLDoc::setValue(const XMLNodePtr     &node,     const TCHAR *tagName, const Timestamp &value, bool force) {
  setValue(node,tagName,value.toString(ddMMyyyyhhmmss),force);
}
예제 #12
0
void BackgroundSync::_fetcherCallback(const StatusWith<Fetcher::QueryResponse>& result,
                                      BSONObjBuilder* bob,
                                      const HostAndPort& source,
                                      OpTime lastOpTimeFetched,
                                      long long lastFetchedHash,
                                      Milliseconds fetcherMaxTimeMS,
                                      Status* returnStatus) {
    // if target cut connections between connecting and querying (for
    // example, because it stepped down) we might not have a cursor
    if (!result.isOK()) {
        LOG(2) << "Error returned from oplog query: " << result.getStatus();
        *returnStatus = result.getStatus();
        return;
    }

    if (inShutdown()) {
        LOG(2) << "Interrupted by shutdown while querying oplog. 1";  // 1st instance.
        return;
    }

    // Check if we have been stopped.
    if (isStopped()) {
        LOG(2) << "Interrupted by stop request while querying the oplog. 1";  // 1st instance.
        return;
    }

    const auto& queryResponse = result.getValue();
    bool syncSourceHasSyncSource = false;
    OpTime sourcesLastOpTime;

    // Forward metadata (containing liveness information) to replication coordinator.
    bool receivedMetadata =
        queryResponse.otherFields.metadata.hasElement(rpc::kReplSetMetadataFieldName);
    if (receivedMetadata) {
        auto metadataResult =
            rpc::ReplSetMetadata::readFromMetadata(queryResponse.otherFields.metadata);
        if (!metadataResult.isOK()) {
            error() << "invalid replication metadata from sync source " << source << ": "
                    << metadataResult.getStatus() << ": " << queryResponse.otherFields.metadata;
            return;
        }
        const auto& metadata = metadataResult.getValue();
        _replCoord->processReplSetMetadata(metadata);
        if (metadata.getPrimaryIndex() != rpc::ReplSetMetadata::kNoPrimary) {
            _replCoord->cancelAndRescheduleElectionTimeout();
        }
        syncSourceHasSyncSource = metadata.getSyncSourceIndex() != -1;
        sourcesLastOpTime = metadata.getLastOpVisible();
    }

    const auto& documents = queryResponse.documents;
    auto firstDocToApply = documents.cbegin();
    auto lastDocToApply = documents.cend();

    if (!documents.empty()) {
        LOG(2) << "fetcher read " << documents.size()
               << " operations from remote oplog starting at " << documents.front()["ts"]
               << " and ending at " << documents.back()["ts"];
    } else {
        LOG(2) << "fetcher read 0 operations from remote oplog";
    }

    // Check start of remote oplog and, if necessary, stop fetcher to execute rollback.
    if (queryResponse.first) {
        auto getNextOperation = [&firstDocToApply, lastDocToApply]() -> StatusWith<BSONObj> {
            if (firstDocToApply == lastDocToApply) {
                return Status(ErrorCodes::OplogStartMissing, "remote oplog start missing");
            }
            return *(firstDocToApply++);
        };

        *returnStatus = checkRemoteOplogStart(getNextOperation, lastOpTimeFetched, lastFetchedHash);
        if (!returnStatus->isOK()) {
            // Stop fetcher and execute rollback.
            return;
        }

        // If this is the first batch and no rollback is needed, we should have advanced
        // the document iterator.
        invariant(firstDocToApply != documents.cbegin());
    }

    // No work to do if we are draining/primary.
    if (_replCoord->isWaitingForApplierToDrain() || _replCoord->getMemberState().primary()) {
        LOG(2) << "Interrupted by waiting for applier to drain "
               << "or becoming primary while querying the oplog. 1";  // 1st instance.
        return;
    }

    // The count of the bytes of the documents read off the network.
    int networkDocumentBytes = 0;
    Timestamp lastTS;
    {
        stdx::unique_lock<stdx::mutex> lock(_mutex);
        // If we are stopped then return without queueing this batch to apply.
        if (_stopped) {
            LOG(2) << "Interrupted by stop request while querying the oplog. 2";  // 2nd instance.
            return;
        }
        lastTS = _lastOpTimeFetched.getTimestamp();
    }
    int count = 0;
    for (auto&& doc : documents) {
        networkDocumentBytes += doc.objsize();
        ++count;

        // If this is the first response (to the $gte query) then we already applied the first doc.
        if (queryResponse.first && count == 1) {
            continue;
        }

        // Check to see if the oplog entry goes back in time for this document.
        const auto docOpTime = OpTime::parseFromOplogEntry(doc);
        fassertStatusOK(34362, docOpTime.getStatus());  // entries must have a "ts" field.
        const auto docTS = docOpTime.getValue().getTimestamp();

        if (lastTS >= docTS) {
            *returnStatus = Status(
                ErrorCodes::OplogOutOfOrder,
                str::stream() << "Reading the oplog from" << source.toString()
                              << " returned out of order entries. lastTS: " << lastTS.toString()
                              << " outOfOrderTS:" << docTS.toString() << " at count:" << count);
            return;
        }
        lastTS = docTS;
    }

    // These numbers are for the documents we will apply.
    auto toApplyDocumentCount = documents.size();
    auto toApplyDocumentBytes = networkDocumentBytes;
    if (queryResponse.first) {
        // The count is one less since the first document found was already applied ($gte $ts query)
        // and we will not apply it again. We just needed to check it so we didn't rollback, or
        // error above.
        --toApplyDocumentCount;
        const auto alreadyAppliedDocument = documents.cbegin();
        toApplyDocumentBytes -= alreadyAppliedDocument->objsize();
    }

    if (toApplyDocumentBytes > 0) {
        // Wait for enough space.
        _buffer.waitForSpace(toApplyDocumentBytes);

        OCCASIONALLY {
            LOG(2) << "bgsync buffer has " << _buffer.size() << " bytes";
        }

        // Buffer docs for later application.
        std::vector<BSONObj> objs{firstDocToApply, lastDocToApply};
        _buffer.pushAllNonBlocking(objs);

        // Inc stats.
        opsReadStats.increment(documents.size());  // we read all of the docs in the query.
        networkByteStats.increment(networkDocumentBytes);
        bufferCountGauge.increment(toApplyDocumentCount);
        bufferSizeGauge.increment(toApplyDocumentBytes);

        // Update last fetched info.
        auto lastDoc = objs.back();
        {
            stdx::unique_lock<stdx::mutex> lock(_mutex);
            _lastFetchedHash = lastDoc["h"].numberLong();
            _lastOpTimeFetched = fassertStatusOK(28770, OpTime::parseFromOplogEntry(lastDoc));
            LOG(3) << "batch resetting _lastOpTimeFetched: " << _lastOpTimeFetched;
        }
    }
예제 #13
0
void passByConstReference(const Timestamp& t)
{
	printf("%s\n", t.toString().c_str());
}
예제 #14
0
void passByValue(Timestamp t)
{
	printf("%s\n", t.toString().c_str());
}
예제 #15
0
 void onMessage(const TcpConnectionPtr& conn, ByteBuffer *buf, Timestamp time)
 {
     string msg(buf->retrieveAllAsString());
     printf("onMessage(): recv a message [%s]\n", msg.c_str());
     LOG_INFO("[%d] recv %d bytes at %s", conn->fd(), msg.size(), time.toString().c_str());
 }
예제 #16
0
void ReplicationRecoveryImpl::_applyToEndOfOplog(OperationContext* opCtx,
                                                 const Timestamp& oplogApplicationStartPoint,
                                                 const Timestamp& topOfOplog) {
    invariant(!oplogApplicationStartPoint.isNull());
    invariant(!topOfOplog.isNull());

    // Check if we have any unapplied ops in our oplog. It is important that this is done after
    // deleting the ragged end of the oplog.
    if (oplogApplicationStartPoint == topOfOplog) {
        log() << "No oplog entries to apply for recovery. Start point is at the top of the oplog.";
        return;  // We've applied all the valid oplog we have.
    } else if (oplogApplicationStartPoint > topOfOplog) {
        severe() << "Applied op " << oplogApplicationStartPoint.toBSON()
                 << " not found. Top of oplog is " << topOfOplog.toBSON() << '.';
        fassertFailedNoTrace(40313);
    }

    log() << "Replaying stored operations from " << oplogApplicationStartPoint.toBSON()
          << " (exclusive) to " << topOfOplog.toBSON() << " (inclusive).";

    OplogBufferLocalOplog oplogBuffer(oplogApplicationStartPoint);
    oplogBuffer.startup(opCtx);

    RecoveryOplogApplierStats stats;

    auto writerPool = OplogApplier::makeWriterPool();
    OplogApplier::Options options;
    options.allowNamespaceNotFoundErrorsOnCrudOps = true;
    options.skipWritesToOplog = true;
    // During replication recovery, the stableTimestampForRecovery refers to the stable timestamp
    // from which we replay the oplog.
    // For startup recovery, this will be the recovery timestamp, which is the stable timestamp that
    // the storage engine recovered to on startup. For rollback recovery, this will be the last
    // stable timestamp, returned when we call recoverToStableTimestamp.
    // We keep track of this for prepared transactions so that when we apply a commitTransaction
    // oplog entry, we can check if it occurs before or after the stable timestamp and decide
    // whether the operations would have already been reflected in the data.
    options.stableTimestampForRecovery = oplogApplicationStartPoint;
    OplogApplierImpl oplogApplier(nullptr,
                                  &oplogBuffer,
                                  &stats,
                                  nullptr,
                                  _consistencyMarkers,
                                  _storageInterface,
                                  options,
                                  writerPool.get());

    OplogApplier::BatchLimits batchLimits;
    batchLimits.bytes = OplogApplier::calculateBatchLimitBytes(opCtx, _storageInterface);
    batchLimits.ops = OplogApplier::getBatchLimitOperations();

    OpTime applyThroughOpTime;
    OplogApplier::Operations batch;
    while (
        !(batch = fassert(50763, oplogApplier.getNextApplierBatch(opCtx, batchLimits))).empty()) {
        applyThroughOpTime = uassertStatusOK(oplogApplier.multiApply(opCtx, std::move(batch)));
    }
    stats.complete(applyThroughOpTime);
    invariant(oplogBuffer.isEmpty(),
              str::stream() << "Oplog buffer not empty after applying operations. Last operation "
                               "applied with optime: "
                            << applyThroughOpTime.toBSON());
    invariant(applyThroughOpTime.getTimestamp() == topOfOplog,
              str::stream() << "Did not apply to top of oplog. Applied through: "
                            << applyThroughOpTime.toString()
                            << ". Top of oplog: "
                            << topOfOplog.toString());
    oplogBuffer.shutdown(opCtx);

    // We may crash before setting appliedThrough. If we have a stable checkpoint, we will recover
    // to that checkpoint at a replication consistent point, and applying the oplog is safe.
    // If we don't have a stable checkpoint, then we must be in startup recovery, and not rollback
    // recovery, because we only roll back to a stable timestamp when we have a stable checkpoint.
    // Startup recovery from an unstable checkpoint only ever applies a single batch and it is safe
    // to replay the batch from any point.
    _consistencyMarkers->setAppliedThrough(opCtx, applyThroughOpTime);
}
예제 #17
0
void Resolver::onRead(int sockfd, Timestamp t){
    LOG_DEBUG << "onRead " << sockfd << " at " << t.toString();
    ares_process_fd(ctx_, sockfd, ARES_SOCKET_BAD);
}
예제 #18
0
std::string
VirusOne::Services::Timestamp::now() {
  Timestamp t;
  return t.toString();
}