Beispiel #1
0
 bool getLastGTIDinOplog(GTID* gtid) {
     Client::ReadContext ctx(rsoplog);
     // TODO: Should this be using rsOplogDetails, verifying non-null?
     NamespaceDetails *d = nsdetails(rsoplog);
     shared_ptr<Cursor> c( BasicCursor::make(d, -1) );
     if (c->ok()) {
         *gtid = getGTIDFromOplogEntry(c->current());
         return true;
     }
     return false;
 }
Beispiel #2
0
 bool getLastGTIDinOplog(GTID* gtid) {
     LOCK_REASON(lockReason, "repl: looking up last GTID in oplog");
     Client::ReadContext ctx(rsoplog, lockReason);
     // TODO: Should this be using rsOplogDetails, verifying non-null?
     Collection *cl = getCollection(rsoplog);
     shared_ptr<Cursor> c( Cursor::make(cl, -1) );
     if (c->ok()) {
         *gtid = getGTIDFromOplogEntry(c->current());
         return true;
     }
     return false;
 }
Beispiel #3
0
    // returns number of seconds to sleep, if any
    uint32_t BackgroundSync::produce() {

        // normally msgCheckNewState gets called periodically, but in a single node repl set
        // there are no heartbeat threads, so we do it here to be sure.  this is relevant if the
        // singleton member has done a stepDown() and needs to come back up.
        if (theReplSet->config().members.size() == 1 &&
            theReplSet->myConfig().potentiallyHot()) {
            Manager* mgr = theReplSet->mgr;
            // When would mgr be null?  During replsettest'ing, in which case we should
            // fall through and actually apply ops as if we were a real secondary.
            if (mgr) {
                mgr->send(boost::bind(&Manager::msgCheckNewState, theReplSet->mgr));
                // There should never be ops to sync in a 1-member set, anyway
                return 1;
            }
        }

        OplogReader r(true /* doHandshake */);

        // find a target to sync from the last op time written
        getOplogReader(r);

        // no server found
        GTID lastGTIDFetched = theReplSet->gtidManager->getLiveState();
        {
            boost::unique_lock<boost::mutex> lock(_mutex);

            if (_currentSyncTarget == NULL) {
                // if there is no one to sync from
                return 1; //sleep one second
            }
        }
        r.tailingQueryGTE(rsoplog, lastGTIDFetched);

        // if target cut connections between connecting and querying (for
        // example, because it stepped down) we might not have a cursor
        if (!r.haveCursor()) {
            return 0;
        }

        try {
            // this method may actually run rollback, yes, the name is bad
            if (isRollbackRequired(r)) {
                // sleep 2 seconds and try again. (The 2 is arbitrary).
                // If we are not fatal, then we will keep trying to sync
                // from another machine
                return 2;
            }
        }
        catch (RollbackOplogException& re){
            // we attempted a rollback and failed, we must go fatal.
            log() << "Caught a RollbackOplogException during rollback, going fatal" << rsLog;
            theReplSet->fatal();
            return 2; // 2 is arbitrary, if we are going fatal, we are done
        }

        while (!_opSyncShouldExit) {
            while (!_opSyncShouldExit) {
                {
                    // check if we should bail out
                    boost::unique_lock<boost::mutex> lck(_mutex);
                    if (!_opSyncShouldRun) {
                        return 0;
                    }
                }
                if (!r.moreInCurrentBatch()) {
                    // check to see if we have a request to sync
                    // from a specific target. If so, get out so that
                    // we can restart the act of syncing and
                    // do so from the correct target
                    if (theReplSet->gotForceSync()) {
                        return 0;
                    }

                    verify(!theReplSet->isPrimary());

                    if (shouldChangeSyncTarget()) {
                        return 0;
                    }
                    //record time for each getmore
                    {
                        TimerHolder batchTimer(&getmoreReplStats);
                        r.more();
                    }
                    //increment
                    networkByteStats.increment(r.currentBatchMessageSize());

                }

                if (!r.more()) {
                    break;
                }

                // This is the operation we have received from the target
                // that we must put in our oplog with an applied field of false
                BSONObj o = r.nextSafe().getOwned();
                opsReadStats.increment();
                LOG(3) << "replicating " << o.toString(false, true) << " from " << _currentSyncTarget->fullName() << endl;
                uint64_t ts = o["ts"]._numberLong();

                // now that we have the element in o, let's check
                // if there a delay is required (via slaveDelay) before
                // writing it to the oplog
                if (theReplSet->myConfig().slaveDelay > 0) {
                    handleSlaveDelay(ts);
                    {
                        boost::unique_lock<boost::mutex> lck(_mutex);
                        if (!_opSyncShouldRun) {
                            break;
                        }
                    }
                }

                {
                    Timer timer;
                    bool bigTxn = false;
                    {
                        Client::Transaction transaction(DB_SERIALIZABLE);
                        replicateFullTransactionToOplog(o, r, &bigTxn);
                        // we are operating as a secondary. We don't have to fsync
                        transaction.commit(DB_TXN_NOSYNC);
                    }
                    {
                        GTID currEntry = getGTIDFromOplogEntry(o);
                        uint64_t lastHash = o["h"].numberLong();
                        boost::unique_lock<boost::mutex> lock(_mutex);
                        // update counters
                        theReplSet->gtidManager->noteGTIDAdded(currEntry, ts, lastHash);
                        // notify applier thread that data exists
                        if (_deque.size() == 0) {
                            _queueCond.notify_all();
                        }
                        _deque.push_back(o);
                        bufferCountGauge.increment();
                        bufferSizeGauge.increment(o.objsize());
                        // this is a flow control mechanism, with bad numbers
                        // hard coded for now just to get something going.
                        // If the opSync thread notices that we have over 20000
                        // transactions in the queue, it waits until we get below
                        // 10000. This is where we wait if we get too high
                        // Once we have spilling of transactions working, this
                        // logic will need to be redone
                        if (_deque.size() > 20000) {
                            _queueCond.wait(lock);
                        }
                        if (bigTxn) {
                            // if we have a large transaction, we don't want
                            // to let it pile up. We want to process it immedietely
                            // before processing anything else.
                            while (_deque.size() > 0) {
                                _queueDone.wait(lock);
                            }
                        }
                    }
                }
            } // end while

            if (shouldChangeSyncTarget()) {
                return 0;
            }

            r.tailCheck();
            if( !r.haveCursor() ) {
                LOG(1) << "replSet end opSync pass" << rsLog;
                return 0;
            }

            // looping back is ok because this is a tailable cursor
        }
        return 0;
    }
Beispiel #4
0
    void BackgroundSync::applyOpsFromOplog() {
        GTID lastLiveGTID;
        GTID lastUnappliedGTID;
        while (1) {
            try {
                BSONObj curr;
                {
                    boost::unique_lock<boost::mutex> lck(_mutex);
                    // wait until we know an item has been produced
                    while (_deque.size() == 0 && !_applierShouldExit) {
                        _queueDone.notify_all();
                        _queueCond.wait(lck);
                    }
                    if (_deque.size() == 0 && _applierShouldExit) {
                        return; 
                    }
                    curr = _deque.front();
                }
                GTID currEntry = getGTIDFromOplogEntry(curr);
                theReplSet->gtidManager->noteApplyingGTID(currEntry);
                // we must do applyTransactionFromOplog in a loop
                // because once we have called noteApplyingGTID, we must
                // continue until we are successful in applying the transaction.
                for (uint32_t numTries = 0; numTries <= 100; numTries++) {
                    try {
                        numTries++;
                        TimerHolder timer(&applyBatchStats);
                        applyTransactionFromOplog(curr);
                        opsAppliedStats.increment();
                        break;
                    }
                    catch (std::exception &e) {
                        log() << "exception during applying transaction from oplog: " << e.what() << endl;
                        log() << "oplog entry: " << curr.str() << endl;
                        if (numTries == 100) {
                            // something is really wrong if we fail 100 times, let's abort
                            ::abort();
                        }
                        sleepsecs(1);
                    }
                }
                LOG(3) << "applied " << curr.toString(false, true) << endl;
                theReplSet->gtidManager->noteGTIDApplied(currEntry);

                {
                    boost::unique_lock<boost::mutex> lck(_mutex);
                    dassert(_deque.size() > 0);
                    _deque.pop_front();
                    bufferCountGauge.increment(-1);
                    bufferSizeGauge.increment(-curr.objsize());
                    
                    // this is a flow control mechanism, with bad numbers
                    // hard coded for now just to get something going.
                    // If the opSync thread notices that we have over 20000
                    // transactions in the queue, it waits until we get below
                    // 10000. This is where we signal that we have gotten there
                    // Once we have spilling of transactions working, this
                    // logic will need to be redone
                    if (_deque.size() == 10000) {
                        _queueCond.notify_all();
                    }
                }
            }
            catch (DBException& e) {
                sethbmsg(str::stream() << "db exception in producer on applier thread: " << e.toString());
                sleepsecs(2);
            }
            catch (std::exception& e2) {
                sethbmsg(str::stream() << "exception in producer on applier thread: " << e2.what());
                sleepsecs(2);
            }
        }
    }