bool _testOplogEntryIsForCappedCollection(OperationContext* txn, const NamespaceString& nss, const CollectionOptions& options) { auto writerPool = SyncTail::makeWriterPool(); MultiApplier::Operations operationsApplied; auto applyOperationFn = [&operationsApplied](MultiApplier::OperationPtrs* operationsToApply) { for (auto&& opPtr : *operationsToApply) { operationsApplied.push_back(*opPtr); } }; createCollection(txn, nss, options); auto op = makeInsertDocumentOplogEntry({Timestamp(Seconds(1), 0), 1LL}, nss, BSON("a" << 1)); ASSERT_FALSE(op.isForCappedCollection); auto lastOpTime = unittest::assertGet(multiApply(txn, writerPool.get(), {op}, applyOperationFn)); ASSERT_EQUALS(op.getOpTime(), lastOpTime); ASSERT_EQUALS(1U, operationsApplied.size()); const auto& opApplied = operationsApplied.front(); ASSERT_EQUALS(op, opApplied); // "isForCappedCollection" is not parsed from raw oplog entry document. return opApplied.isForCappedCollection; }
TEST_F(SyncTailTest, MultiApplyAssignsOperationsToWriterThreadsBasedOnNamespaceHash) { // This test relies on implementation details of how multiApply uses hashing to distribute ops // to threads. It is possible for this test to fail, even if the implementation of multiApply is // correct. If it fails, consider adjusting the namespace names (to adjust the hash values) or // the number of threads in the pool. NamespaceString nss1("test.t0"); NamespaceString nss2("test.t1"); OldThreadPool writerPool(3); stdx::mutex mutex; std::vector<MultiApplier::Operations> operationsApplied; auto applyOperationFn = [&mutex, &operationsApplied]( MultiApplier::OperationPtrs* operationsForWriterThreadToApply) { stdx::lock_guard<stdx::mutex> lock(mutex); operationsApplied.emplace_back(); for (auto&& opPtr : *operationsForWriterThreadToApply) { operationsApplied.back().push_back(*opPtr); } }; auto op1 = makeInsertDocumentOplogEntry({Timestamp(Seconds(1), 0), 1LL}, nss1, BSON("x" << 1)); auto op2 = makeInsertDocumentOplogEntry({Timestamp(Seconds(2), 0), 1LL}, nss2, BSON("x" << 2)); NamespaceString nssForInsert; std::vector<BSONObj> operationsWrittenToOplog; _storageInterface->insertDocumentsFn = [&mutex, &nssForInsert, &operationsWrittenToOplog]( OperationContext* txn, const NamespaceString& nss, const std::vector<BSONObj>& docs) { stdx::lock_guard<stdx::mutex> lock(mutex); nssForInsert = nss; operationsWrittenToOplog = docs; return Status::OK(); }; auto lastOpTime = unittest::assertGet(multiApply(_txn.get(), &writerPool, {op1, op2}, applyOperationFn)); ASSERT_EQUALS(op2.getOpTime(), lastOpTime); // Each writer thread should be given exactly one operation to apply. std::vector<OpTime> seen; { stdx::lock_guard<stdx::mutex> lock(mutex); ASSERT_EQUALS(operationsApplied.size(), 2U); for (auto&& operationsAppliedByThread : operationsApplied) { ASSERT_EQUALS(1U, operationsAppliedByThread.size()); const auto& oplogEntry = operationsAppliedByThread.front(); ASSERT_TRUE(std::find(seen.cbegin(), seen.cend(), oplogEntry.getOpTime()) == seen.cend()); ASSERT_TRUE(oplogEntry == op1 || oplogEntry == op2); seen.push_back(oplogEntry.getOpTime()); } } // Check ops in oplog. stdx::lock_guard<stdx::mutex> lock(mutex); ASSERT_EQUALS(2U, operationsWrittenToOplog.size()); ASSERT_EQUALS(NamespaceString(rsOplogName), nssForInsert); ASSERT_EQUALS(op1.raw, operationsWrittenToOplog[0]); ASSERT_EQUALS(op2.raw, operationsWrittenToOplog[1]); }
TEST_F(SyncTailTest, MultiApplyAssignsOperationsToWriterThreadsBasedOnNamespaceHash) { NamespaceString nss1("test.t0"); NamespaceString nss2("test.t1"); OldThreadPool writerPool(2); // Ensure that namespaces are hashed to different threads in pool. ASSERT_EQUALS(0U, StringMapTraits::hash(nss1.ns()) % writerPool.getNumThreads()); ASSERT_EQUALS(1U, StringMapTraits::hash(nss2.ns()) % writerPool.getNumThreads()); stdx::mutex mutex; std::vector<MultiApplier::Operations> operationsApplied; auto applyOperationFn = [&mutex, &operationsApplied]( MultiApplier::OperationPtrs* operationsForWriterThreadToApply) { stdx::lock_guard<stdx::mutex> lock(mutex); operationsApplied.emplace_back(); for (auto&& opPtr : *operationsForWriterThreadToApply) { operationsApplied.back().push_back(*opPtr); } }; auto op1 = makeInsertDocumentOplogEntry({Timestamp(Seconds(1), 0), 1LL}, nss1, BSON("x" << 1)); auto op2 = makeInsertDocumentOplogEntry({Timestamp(Seconds(2), 0), 1LL}, nss2, BSON("x" << 2)); NamespaceString nssForInsert; std::vector<BSONObj> operationsWrittenToOplog; _storageInterface->insertDocumentsFn = [&mutex, &nssForInsert, &operationsWrittenToOplog]( OperationContext* txn, const NamespaceString& nss, const std::vector<BSONObj>& docs) { stdx::lock_guard<stdx::mutex> lock(mutex); nssForInsert = nss; operationsWrittenToOplog = docs; return Status::OK(); }; auto lastOpTime = unittest::assertGet(multiApply(_txn.get(), &writerPool, {op1, op2}, applyOperationFn)); ASSERT_EQUALS(op2.getOpTime(), lastOpTime); // Each writer thread should be given exactly one operation to apply. std::vector<OpTime> seen; { stdx::lock_guard<stdx::mutex> lock(mutex); ASSERT_EQUALS(writerPool.getNumThreads(), operationsApplied.size()); for (auto&& operationsAppliedByThread : operationsApplied) { ASSERT_EQUALS(1U, operationsAppliedByThread.size()); const auto& oplogEntry = operationsAppliedByThread.front(); ASSERT_TRUE(std::find(seen.cbegin(), seen.cend(), oplogEntry.getOpTime()) == seen.cend()); ASSERT_TRUE(oplogEntry == op1 || oplogEntry == op2); seen.push_back(oplogEntry.getOpTime()); } } // Check ops in oplog. stdx::lock_guard<stdx::mutex> lock(mutex); ASSERT_EQUALS(2U, operationsWrittenToOplog.size()); ASSERT_EQUALS(NamespaceString(rsOplogName), nssForInsert); ASSERT_EQUALS(op1.raw, operationsWrittenToOplog[0]); ASSERT_EQUALS(op2.raw, operationsWrittenToOplog[1]); }
TEST_F(SyncTailTest, MultiSyncApplyGroupsInsertOperationByNamespaceBeforeApplying) { int seconds = 0; auto makeOp = [&seconds](const NamespaceString& nss) { return makeInsertDocumentOplogEntry( {Timestamp(Seconds(seconds), 0), 1LL}, nss, BSON("_id" << seconds++)); }; NamespaceString nss1("test." + _agent.getSuiteName() + "_" + _agent.getTestName() + "_1"); NamespaceString nss2("test." + _agent.getSuiteName() + "_" + _agent.getTestName() + "_2"); auto createOp1 = makeCreateCollectionOplogEntry({Timestamp(Seconds(seconds++), 0), 1LL}, nss1); auto createOp2 = makeCreateCollectionOplogEntry({Timestamp(Seconds(seconds++), 0), 1LL}, nss2); auto insertOp1a = makeOp(nss1); auto insertOp1b = makeOp(nss1); auto insertOp2a = makeOp(nss2); auto insertOp2b = makeOp(nss2); MultiApplier::Operations operationsApplied; auto syncApply = [&operationsApplied](OperationContext*, const BSONObj& op, bool) { operationsApplied.push_back(OplogEntry(op)); return Status::OK(); }; MultiApplier::OperationPtrs ops = { &createOp1, &createOp2, &insertOp1a, &insertOp2a, &insertOp1b, &insertOp2b}; ASSERT_OK(multiSyncApply_noAbort(_txn.get(), &ops, syncApply)); ASSERT_EQUALS(4U, operationsApplied.size()); ASSERT_EQUALS(createOp1, operationsApplied[0]); ASSERT_EQUALS(createOp2, operationsApplied[1]); // Check grouped insert operations in namespace "nss1". ASSERT_EQUALS(insertOp1a.getOpTime(), operationsApplied[2].getOpTime()); ASSERT_EQUALS(insertOp1a.ns, operationsApplied[2].ns); ASSERT_EQUALS(BSONType::Array, operationsApplied[2].o.type()); auto group1 = operationsApplied[2].o.Array(); ASSERT_EQUALS(2U, group1.size()); ASSERT_EQUALS(insertOp1a.o.Obj(), group1[0].Obj()); ASSERT_EQUALS(insertOp1b.o.Obj(), group1[1].Obj()); // Check grouped insert operations in namespace "nss2". ASSERT_EQUALS(insertOp2a.getOpTime(), operationsApplied[3].getOpTime()); ASSERT_EQUALS(insertOp2a.ns, operationsApplied[3].ns); ASSERT_EQUALS(BSONType::Array, operationsApplied[3].o.type()); auto group2 = operationsApplied[3].o.Array(); ASSERT_EQUALS(2U, group2.size()); ASSERT_EQUALS(insertOp2a.o.Obj(), group2[0].Obj()); ASSERT_EQUALS(insertOp2b.o.Obj(), group2[1].Obj()); }
StatusWith<RollBackLocalOperations::RollbackCommonPoint> RollBackLocalOperations::onRemoteOperation( const BSONObj& operation) { if (_scanned == 0) { auto result = _localOplogIterator->next(); if (!result.isOK()) { return StatusWith<RollbackCommonPoint>(ErrorCodes::OplogStartMissing, "no oplog during initsync"); } _localOplogValue = result.getValue(); long long diff = static_cast<long long>(getTimestamp(_localOplogValue).getSecs()) - getTimestamp(operation).getSecs(); // diff could be positive, negative, or zero log() << "rollback our last optime: " << getTimestamp(_localOplogValue).toStringPretty(); log() << "rollback their last optime: " << getTimestamp(operation).toStringPretty(); log() << "rollback diff in end of log times: " << diff << " seconds"; if (diff > 1800) { severe() << "rollback too long a time period for a rollback."; return StatusWith<RollbackCommonPoint>( ErrorCodes::ExceededTimeLimit, "rollback error: not willing to roll back more than 30 minutes of data"); } } while (getTimestamp(_localOplogValue) > getTimestamp(operation)) { _scanned++; auto status = _rollbackOperation(_localOplogValue.first); if (!status.isOK()) { invariant(ErrorCodes::NoSuchKey != status.code()); return status; } auto result = _localOplogIterator->next(); if (!result.isOK()) { severe() << "rollback error RS101 reached beginning of local oplog"; log() << " scanned: " << _scanned; log() << " theirTime: " << getTimestamp(operation).toStringLong(); log() << " ourTime: " << getTimestamp(_localOplogValue).toStringLong(); return StatusWith<RollbackCommonPoint>(ErrorCodes::NoMatchingDocument, "RS101 reached beginning of local oplog [2]"); } _localOplogValue = result.getValue(); } if (getTimestamp(_localOplogValue) == getTimestamp(operation)) { _scanned++; if (getHash(_localOplogValue) == getHash(operation)) { return StatusWith<RollbackCommonPoint>( std::make_pair(getOpTime(_localOplogValue), _localOplogValue.second)); } auto status = _rollbackOperation(_localOplogValue.first); if (!status.isOK()) { invariant(ErrorCodes::NoSuchKey != status.code()); return status; } auto result = _localOplogIterator->next(); if (!result.isOK()) { severe() << "rollback error RS101 reached beginning of local oplog"; log() << " scanned: " << _scanned; log() << " theirTime: " << getTimestamp(operation).toStringLong(); log() << " ourTime: " << getTimestamp(_localOplogValue).toStringLong(); return StatusWith<RollbackCommonPoint>(ErrorCodes::NoMatchingDocument, "RS101 reached beginning of local oplog [1]"); } _localOplogValue = result.getValue(); return StatusWith<RollbackCommonPoint>( ErrorCodes::NoSuchKey, "Unable to determine common point - same timestamp but different hash. " "Need to process additional remote operations."); } invariant(getTimestamp(_localOplogValue) < getTimestamp(operation)); _scanned++; return StatusWith<RollbackCommonPoint>(ErrorCodes::NoSuchKey, "Unable to determine common point. " "Need to process additional remote operations."); }
bool ReadConcernArgs::isEmpty() const { return getOpTime().isNull() && getLevel() == repl::ReadConcernLevel::kLocalReadConcern; }