void addOp(const string& op, BSONObj o, BSONObj* o2 = NULL, const char* coll = NULL, int version = 0) { OpTime ts(getNextGlobalOptime()); BSONObjBuilder b; b.appendTimestamp("ts", ts.asLL()); if (version != 0) { b.append("v", version); } b.append("op", op); b.append("o", o); if (o2) { b.append("o2", *o2); } if (coll) { b.append("ns", coll); } else { b.append("ns", ns()); } _bgsync->addDoc(b.done()); }
static void _logOpOld(OperationContext* txn, const char *opstr, const char *ns, const char *logNS, const BSONObj& obj, BSONObj *o2, bool *bb, bool fromMigrate ) { Lock::DBWrite lk(txn->lockState(), "local"); WriteUnitOfWork wunit(txn); static BufBuilder bufbuilder(8*1024); // todo there is likely a mutex on this constructor if ( strncmp(ns, "local.", 6) == 0 ) { if ( strncmp(ns, "local.slaves", 12) == 0 ) { resetSlaveCache(); } return; } mutex::scoped_lock lk2(newOpMutex); OpTime ts(getNextGlobalOptime()); newOptimeNotifier.notify_all(); /* we jump through a bunch of hoops here to avoid copying the obj buffer twice -- instead we do a single copy to the destination position in the memory mapped file. */ bufbuilder.reset(); BSONObjBuilder b(bufbuilder); b.appendTimestamp("ts", ts.asDate()); b.append("op", opstr); b.append("ns", ns); if (fromMigrate) b.appendBool("fromMigrate", true); if ( bb ) b.appendBool("b", *bb); if ( o2 ) b.append("o2", *o2); BSONObj partial = b.done(); // partial is everything except the o:... part. if( logNS == 0 ) { logNS = "local.oplog.$main"; } if ( localOplogMainCollection == 0 ) { Client::Context ctx(txn, logNS); localDB = ctx.db(); verify( localDB ); localOplogMainCollection = localDB->getCollection(txn, logNS); verify( localOplogMainCollection ); } Client::Context ctx(txn, logNS , localDB); OplogDocWriter writer( partial, obj ); checkOplogInsert( localOplogMainCollection->insertDocument( txn, &writer, false ) ); ctx.getClient()->setLastOp( ts ); wunit.commit(); }
Status ModifierCurrentDate::apply() const { const bool destExists = (_preparedState->elemFound.ok() && _preparedState->idxFound == (_updatePath.numParts() - 1)); mutablebson::Document& doc = _preparedState->doc; StringData lastPart = _updatePath.getPart(_updatePath.numParts() - 1); // If the element exists and is the same type, then that is what we want to work with mutablebson::Element elemToSet = destExists ? _preparedState->elemFound : doc.end(); if (!destExists) { // Creates the final element that's going to be $set in 'doc'. // fills in the value with place-holder/empty elemToSet = _typeIsDate ? doc.makeElementDate(lastPart, Date_t()) : doc.makeElementTimestamp(lastPart, OpTime()); if (!elemToSet.ok()) { return Status(ErrorCodes::InternalError, "can't create new element"); } // Now, we can be in two cases here, as far as attaching the element being set goes: // (a) none of the parts in the element's path exist, or (b) some parts of the path // exist but not all. if (!_preparedState->elemFound.ok()) { _preparedState->elemFound = doc.root(); _preparedState->idxFound = 0; } else { _preparedState->idxFound++; } // createPathAt() will complete the path and attach 'elemToSet' at the end of it. Status s = pathsupport::createPathAt( _updatePath, _preparedState->idxFound, _preparedState->elemFound, elemToSet); if (!s.isOK()) return s; } dassert(elemToSet.ok()); // By the time we are here the element is in place and we just need to update the value if (_typeIsDate) { const mongo::Date_t now = mongo::jsTime(); Status s = elemToSet.setValueDate(now); if (!s.isOK()) return s; } else { Status s = elemToSet.setValueTimestamp(getNextGlobalOptime()); if (!s.isOK()) return s; } // Set the elemFound, idxFound to the changed element for oplog logging. _preparedState->elemFound = elemToSet; _preparedState->idxFound = (_updatePath.numParts() - 1); return Status::OK(); }
void insertSucceed() { BSONObjBuilder b; OpTime ts(getNextGlobalOptime()); b.appendTimestamp("ts", ts.asLL()); b.append("op", "i"); b.append("o", BSON("_id" << 123 << "x" << 456)); b.append("ns", cappedNs()); verify(apply(b.obj())); }
void updateSucceed() { BSONObjBuilder b; OpTime ts(getNextGlobalOptime()); b.appendTimestamp("ts", ts.asLL()); b.append("op", "u"); b.append("o", BSON("$set" << BSON("x" << 789))); b.append("o2", BSON("x" << 456)); b.append("ns", cappedNs()); verify(apply(b.obj())); }
BSONObj updateFail() { BSONObjBuilder b; OpTime ts(getNextGlobalOptime()); b.appendTimestamp("ts", ts.asLL()); b.append("op", "u"); b.append("o", BSON("$set" << BSON("x" << 456))); b.append("o2", BSON("_id" << 123 << "x" << 123)); b.append("ns", _cappedNs); BSONObj o = b.obj(); verify(!apply(o)); return o; }
void run() { OpTime o(getNextGlobalOptime()); BSONObjBuilder b; b.append("ns","dummy"); b.appendTimestamp("ts", o.asLL()); BSONObj obj = b.obj(); MockInitialSync mock; // all three should succeed std::vector<BSONObj> ops; ops.push_back(obj); repl::multiInitialSyncApply(ops, &mock); mock.failOnStep = MockInitialSync::FAIL_FIRST_APPLY; repl::multiInitialSyncApply(ops, &mock); mock.retry = false; repl::multiInitialSyncApply(ops, &mock); drop(); }
void run() { OpTime o(getNextGlobalOptime()); BSONObjBuilder b; b.appendTimestamp("ts", o.asLL()); b.append("op", "u"); b.append("o", BSON("$set" << BSON("x" << 456))); b.append("o2", BSON("_id" << 123)); b.append("ns", ns()); BSONObj obj = b.obj(); SyncTest2 sync2; std::vector<BSONObj> ops; ops.push_back(obj); sync2.insertOnRetry = true; // succeeds multiInitialSyncApply(ops, &sync2); BSONObj fin = findOne(); verify(fin["x"].Number() == 456); drop(); }
static void _logOpRS(OperationContext* txn, const char *opstr, const char *ns, const char *logNS, const BSONObj& obj, BSONObj *o2, bool *bb, bool fromMigrate ) { Lock::DBWrite lk1(txn->lockState(), "local"); WriteUnitOfWork wunit(txn); if ( strncmp(ns, "local.", 6) == 0 ) { if ( strncmp(ns, "local.slaves", 12) == 0 ) resetSlaveCache(); return; } mutex::scoped_lock lk2(newOpMutex); OpTime ts(getNextGlobalOptime()); newOptimeNotifier.notify_all(); long long hashNew; if( theReplSet ) { if (!theReplSet->box.getState().primary()) { log() << "replSet error : logOp() but not primary"; fassertFailed(17405); } hashNew = (theReplSet->lastH * 131 + ts.asLL()) * 17 + theReplSet->selfId(); } else { // must be initiation verify( *ns == 0 ); hashNew = 0; } /* we jump through a bunch of hoops here to avoid copying the obj buffer twice -- instead we do a single copy to the destination position in the memory mapped file. */ logopbufbuilder.reset(); BSONObjBuilder b(logopbufbuilder); b.appendTimestamp("ts", ts.asDate()); b.append("h", hashNew); b.append("v", OPLOG_VERSION); b.append("op", opstr); b.append("ns", ns); if (fromMigrate) b.appendBool("fromMigrate", true); if ( bb ) b.appendBool("b", *bb); if ( o2 ) b.append("o2", *o2); BSONObj partial = b.done(); DEV verify( logNS == 0 ); // check this was never a master/slave master if ( localOplogRSCollection == 0 ) { Client::Context ctx(txn, rsoplog); localDB = ctx.db(); verify( localDB ); localOplogRSCollection = localDB->getCollection( txn, rsoplog ); massert(13347, "local.oplog.rs missing. did you drop it? if so restart server", localOplogRSCollection); } Client::Context ctx(txn, rsoplog, localDB); OplogDocWriter writer( partial, obj ); checkOplogInsert( localOplogRSCollection->insertDocument( txn, &writer, false ) ); /* todo: now() has code to handle clock skew. but if the skew server to server is large it will get unhappy. this code (or code in now() maybe) should be improved. */ if( theReplSet ) { if( !(theReplSet->lastOpTimeWritten<ts) ) { log() << "replication oplog stream went back in time. previous timestamp: " << theReplSet->lastOpTimeWritten << " newest timestamp: " << ts << ". attempting to sync directly from primary." << endl; BSONObjBuilder result; Status status = theReplSet->forceSyncFrom(theReplSet->box.getPrimary()->fullName(), &result); if (!status.isOK()) { log() << "Can't sync from primary: " << status; } } theReplSet->lastOpTimeWritten = ts; theReplSet->lastH = hashNew; ctx.getClient()->setLastOp( ts ); } wunit.commit(); }
void Consensus::_electSelf() { if( time(0) < steppedDown ) return; { const OpTime ord = theReplSet->lastOpTimeWritten; if( ord == 0 ) { log() << "replSet info not trying to elect self, do not yet have a complete set of data from any point in time" << rsLog; return; } } bool allUp; int nTies; if( !_weAreFreshest(allUp, nTies) ) { return; } rs.sethbmsg("",9); if (!allUp && time(0) - serverGlobalParams.started < 60 * 5) { /* the idea here is that if a bunch of nodes bounce all at once, we don't want to drop data if we don't have to -- we'd rather be offline and wait a little longer instead todo: make this configurable. */ rs.sethbmsg("not electing self, not all members up and we have been up less than 5 minutes"); return; } Member& me = *rs._self; if( nTies ) { /* tie? we then randomly sleep to try to not collide on our voting. */ /* todo: smarter. */ if( me.id() == 0 || _sleptLast ) { // would be fine for one node not to sleep // todo: biggest / highest priority nodes should be the ones that get to not sleep } else { verify( !rs.lockedByMe() ); // bad to go to sleep locked unsigned ms = ((unsigned) rand()) % 1000 + 50; DEV log() << "replSet tie " << nTies << " sleeping a little " << ms << "ms" << rsLog; _sleptLast = true; sleepmillis(ms); throw RetryAfterSleepException(); } } _sleptLast = false; time_t start = time(0); unsigned meid = me.id(); int tally = _yea( meid ); bool success = false; try { log() << "replSet info electSelf " << meid << rsLog; BSONObj electCmd = BSON( "replSetElect" << 1 << "set" << rs.name() << "who" << me.fullName() << "whoid" << me.hbinfo().id() << "cfgver" << rs._cfg->version << "round" << OID::gen() /* this is just for diagnostics */ ); int configVersion; list<Target> L; rs.getTargets(L, configVersion); _multiCommand(electCmd, L); { for( list<Target>::iterator i = L.begin(); i != L.end(); i++ ) { LOG(1) << "replSet elect res: " << i->result.toString() << rsLog; if( i->ok ) { int v = i->result["vote"].Int(); tally += v; } } if( tally*2 <= _totalVotes() ) { log() << "replSet couldn't elect self, only received " << tally << " votes" << rsLog; } else if( time(0) - start > 30 ) { // defensive; should never happen as we have timeouts on connection and operation for our conn log() << "replSet too much time passed during our election, ignoring result" << rsLog; } else if( configVersion != rs.config().version ) { log() << "replSet config version changed during our election, ignoring result" << rsLog; } else { /* succeeded. */ LOG(1) << "replSet election succeeded, assuming primary role" << rsLog; success = true; setElectionTime(getNextGlobalOptime()); rs.assumePrimary(); } } } catch( std::exception& ) { if( !success ) _electionFailed(meid); throw; } if( !success ) _electionFailed(meid); }
static void _logOpRS(OperationContext* txn, const char *opstr, const char *ns, const char *logNS, const BSONObj& obj, BSONObj *o2, bool *bb, bool fromMigrate ) { Lock::DBLock lk1(txn->lockState(), "local", newlm::MODE_X); WriteUnitOfWork wunit(txn); if ( strncmp(ns, "local.", 6) == 0 ) { if ( strncmp(ns, "local.slaves", 12) == 0 ) resetSlaveCache(); return; } ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator(); mutex::scoped_lock lk2(newOpMutex); OpTime ts(getNextGlobalOptime()); newOptimeNotifier.notify_all(); long long hashNew = BackgroundSync::get()->getLastAppliedHash(); // Check to make sure logOp() is legal at this point. if (*opstr == 'n') { // 'n' operations are always logged invariant(*ns == '\0'); // 'n' operations do not advance the hash, since they are not rolled back } else { if (!replCoord->canAcceptWritesForDatabase(nsToDatabaseSubstring(ns))) { severe() << "replSet error : logOp() but can't accept write to collection " << ns; fassertFailed(17405); } // Advance the hash hashNew = (hashNew * 131 + ts.asLL()) * 17 + replCoord->getMyId(); } /* we jump through a bunch of hoops here to avoid copying the obj buffer twice -- instead we do a single copy to the destination position in the memory mapped file. */ logopbufbuilder.reset(); BSONObjBuilder b(logopbufbuilder); b.appendTimestamp("ts", ts.asDate()); b.append("h", hashNew); b.append("v", OPLOG_VERSION); b.append("op", opstr); b.append("ns", ns); if (fromMigrate) b.appendBool("fromMigrate", true); if ( bb ) b.appendBool("b", *bb); if ( o2 ) b.append("o2", *o2); BSONObj partial = b.done(); DEV verify( logNS == 0 ); // check this was never a master/slave master if ( localOplogRSCollection == 0 ) { Client::Context ctx(txn, rsoplog); localDB = ctx.db(); verify( localDB ); localOplogRSCollection = localDB->getCollection( txn, rsoplog ); massert(13347, "local.oplog.rs missing. did you drop it? if so restart server", localOplogRSCollection); } Client::Context ctx(txn, rsoplog, localDB); OplogDocWriter writer( partial, obj ); checkOplogInsert( localOplogRSCollection->insertDocument( txn, &writer, false ) ); BackgroundSync::get()->setLastAppliedHash(hashNew); ctx.getClient()->setLastOp( ts ); replCoord->setMyLastOptime(txn, ts); wunit.commit(); }