TEST_F(SyncTailTest, MultiSyncApplySortsOperationsStablyByNamespaceBeforeApplying) { int x = 0; auto makeOp = [&x](const char* ns) -> OplogEntry { return OplogEntry(BSON("op" << "x" << "ns" << ns << "x" << x++)); }; auto op1 = makeOp("test.t1"); auto op2 = makeOp("test.t1"); auto op3 = makeOp("test.t2"); auto op4 = makeOp("test.t3"); MultiApplier::Operations operationsApplied; auto syncApply = [&operationsApplied](OperationContext*, const BSONObj& op, bool) { operationsApplied.push_back(OplogEntry(op)); return Status::OK(); }; MultiApplier::OperationPtrs ops = {&op4, &op1, &op3, &op2}; ASSERT_OK(multiSyncApply_noAbort(_txn.get(), &ops, syncApply)); ASSERT_EQUALS(4U, operationsApplied.size()); ASSERT_EQUALS(op1, operationsApplied[0]); ASSERT_EQUALS(op2, operationsApplied[1]); ASSERT_EQUALS(op3, operationsApplied[2]); ASSERT_EQUALS(op4, operationsApplied[3]); }
StatusWith<OplogApplier::Operations> OplogApplier::getNextApplierBatch( OperationContext* opCtx, const BatchLimits& batchLimits) { if (batchLimits.ops == 0) { return Status(ErrorCodes::InvalidOptions, "Batch size must be greater than 0."); } std::uint32_t totalBytes = 0; Operations ops; BSONObj op; while (_oplogBuffer->peek(opCtx, &op)) { auto entry = OplogEntry(op); // Check for oplog version change. If it is absent, its value is one. if (entry.getVersion() != OplogEntry::kOplogVersion) { std::string message = str::stream() << "expected oplog version " << OplogEntry::kOplogVersion << " but found version " << entry.getVersion() << " in oplog entry: " << redact(entry.toBSON()); severe() << message; return {ErrorCodes::BadValue, message}; } // Commands must be processed one at a time. The only exception to this is applyOps because // applyOps oplog entries are effectively containers for CRUD operations. Therefore, it is // safe to batch applyOps commands with CRUD operations when reading from the oplog buffer. if (entry.isCommand() && (entry.getCommandType() != OplogEntry::CommandType::kApplyOps || entry.shouldPrepare())) { if (ops.empty()) { // Apply commands one-at-a-time. ops.push_back(std::move(entry)); BSONObj opToPopAndDiscard; invariant(_oplogBuffer->tryPop(opCtx, &opToPopAndDiscard)); dassert(ops.back() == OplogEntry(opToPopAndDiscard)); } // Otherwise, apply what we have so far and come back for the command. return std::move(ops); } // Apply replication batch limits. if (ops.size() >= batchLimits.ops) { return std::move(ops); } // Never return an empty batch if there are operations left. if ((totalBytes + entry.getRawObjSizeBytes() >= batchLimits.bytes) && (ops.size() > 0)) { return std::move(ops); } // Add op to buffer. totalBytes += entry.getRawObjSizeBytes(); ops.push_back(std::move(entry)); BSONObj opToPopAndDiscard; invariant(_oplogBuffer->tryPop(opCtx, &opToPopAndDiscard)); dassert(ops.back() == OplogEntry(opToPopAndDiscard)); } return std::move(ops); }
// Static StatusWith<OplogEntry> OplogEntry::parse(const BSONObj& object) { try { return OplogEntry(object); } catch (...) { return exceptionToStatus(); } MONGO_UNREACHABLE; }
BSONObj AbstractOplogFetcherTest::makeNoopOplogEntry(OpTimeWithHash opTimeWithHash) { return OplogEntry(opTimeWithHash.opTime, opTimeWithHash.value, OpTypeEnum::kNoop, NamespaceString("test.t"), BSONObj()) .toBSON(); }
// Copies ops out of the bgsync queue into the deque passed in as a parameter. // Returns true if the batch should be ended early. // Batch should end early if we encounter a command, or if // there are no further ops in the bgsync queue to read. // This function also blocks 1 second waiting for new ops to appear in the bgsync // queue. We can't block forever because there are maintenance things we need // to periodically check in the loop. bool SyncTail::tryPopAndWaitForMore(OperationContext* txn, SyncTail::OpQueue* ops) { BSONObj op; // Check to see if there are ops waiting in the bgsync queue bool peek_success = peek(&op); if (!peek_success) { // if we don't have anything in the queue, wait a bit for something to appear if (ops->empty()) { // block up to 1 second _networkQueue->waitForMore(); return false; } // otherwise, apply what we have return true; } auto entry = OplogEntry(op); // Check for ops that must be processed one at a time. if (entry.raw.isEmpty() || // sentinel that network queue is drained. (entry.opType[0] == 'c') || // commands. // Index builds are acheived through the use of an insert op, not a command op. // The following line is the same as what the insert code uses to detect an index build. (!entry.ns.empty() && nsToCollectionSubstring(entry.ns) == "system.indexes")) { if (ops->empty()) { // apply commands one-at-a-time ops->push_back(std::move(entry)); _networkQueue->consume(); } // otherwise, apply what we have so far and come back for the command return true; } // check for oplog version change int curVersion = 0; if (entry.version.eoo()) // missing version means version 1 curVersion = 1; else curVersion = entry.version.Int(); if (curVersion != OPLOG_VERSION) { severe() << "expected oplog version " << OPLOG_VERSION << " but found version " << curVersion << " in oplog entry: " << op; fassertFailedNoTrace(18820); } // Copy the op to the deque and remove it from the bgsync queue. ops->push_back(std::move(entry)); _networkQueue->consume(); // Go back for more ops return false; }
/** * Creates a create collection oplog entry with given optime. */ OplogEntry makeCreateCollectionOplogEntry(OpTime opTime, const NamespaceString& nss = NamespaceString("test.t")) { BSONObjBuilder bob; bob.appendElements(opTime.toBSON()); bob.append("h", 1LL); bob.append("op", "c"); bob.append("ns", nss.getCommandNS()); bob.append("o", BSON("create" << nss.coll())); return OplogEntry(bob.obj()); }
TEST_F(SyncTailTest, MultiSyncApplyFallsBackOnApplyingInsertsIndividuallyWhenGroupedInsertFails) { int seconds = 0; auto makeOp = [&seconds](const NamespaceString& nss) { return makeInsertDocumentOplogEntry( {Timestamp(Seconds(seconds), 0), 1LL}, nss, BSON("_id" << seconds++)); }; NamespaceString nss("test." + _agent.getSuiteName() + "_" + _agent.getTestName() + "_1"); auto createOp = makeCreateCollectionOplogEntry({Timestamp(Seconds(seconds++), 0), 1LL}, nss); // Generate operations to apply: // {create}, {insert_1}, {insert_2}, .. {insert_(limit)}, {insert_(limit+1)} std::size_t limit = 64; MultiApplier::Operations insertOps; for (std::size_t i = 0; i < limit + 1; ++i) { insertOps.push_back(makeOp(nss)); } MultiApplier::Operations operationsToApply; operationsToApply.push_back(createOp); std::copy(insertOps.begin(), insertOps.end(), std::back_inserter(operationsToApply)); std::size_t numFailedGroupedInserts = 0; MultiApplier::Operations operationsApplied; auto syncApply = [&numFailedGroupedInserts, &operationsApplied](OperationContext*, const BSONObj& op, bool) -> Status { // Reject grouped insert operations. if (op["o"].type() == BSONType::Array) { numFailedGroupedInserts++; return {ErrorCodes::OperationFailed, "grouped inserts not supported"}; } operationsApplied.push_back(OplogEntry(op)); return Status::OK(); }; MultiApplier::OperationPtrs ops; for (auto&& op : operationsToApply) { ops.push_back(&op); } ASSERT_OK(multiSyncApply_noAbort(_txn.get(), &ops, syncApply)); // On failing to apply the grouped insert operation, multiSyncApply should apply the operations // as given in "operationsToApply": // {create}, {insert_1}, {insert_2}, .. {insert_(limit)}, {insert_(limit+1)} ASSERT_EQUALS(limit + 2, operationsApplied.size()); ASSERT_EQUALS(createOp, operationsApplied[0]); for (std::size_t i = 0; i < limit + 1; ++i) { const auto& insertOp = insertOps[i]; ASSERT_EQUALS(insertOp, operationsApplied[i + 1]); } // Ensure that multiSyncApply does not attempt to group remaining operations in first failed // grouped insert operation. ASSERT_EQUALS(1U, numFailedGroupedInserts); }
/** * Creates an insert oplog entry with given optime and namespace. */ OplogEntry makeInsertDocumentOplogEntry(OpTime opTime, const NamespaceString& nss, const BSONObj& documentToInsert) { BSONObjBuilder bob; bob.appendElements(opTime.toBSON()); bob.append("h", 1LL); bob.append("op", "i"); bob.append("ns", nss.ns()); bob.append("o", documentToInsert); return OplogEntry(bob.obj()); }
/** * Creates an update oplog entry with given optime and namespace. */ OplogEntry makeUpdateDocumentOplogEntry(OpTime opTime, const NamespaceString& nss, const BSONObj& documentToUpdate, const BSONObj& updatedDocument) { BSONObjBuilder bob; bob.appendElements(opTime.toBSON()); bob.append("h", 1LL); bob.append("op", "u"); bob.append("ns", nss.ns()); bob.append("o2", documentToUpdate); bob.append("o", updatedDocument); return OplogEntry(bob.obj()); }
TEST_F(SyncTailTest, MultiSyncApplyUsesLimitWhenGroupingInsertOperation) { int seconds = 0; auto makeOp = [&seconds](const NamespaceString& nss) { return makeInsertDocumentOplogEntry( {Timestamp(Seconds(seconds), 0), 1LL}, nss, BSON("_id" << seconds++)); }; NamespaceString nss("test." + _agent.getSuiteName() + "_" + _agent.getTestName() + "_1"); auto createOp = makeCreateCollectionOplogEntry({Timestamp(Seconds(seconds++), 0), 1LL}, nss); // Generate operations to apply: // {create}, {insert_1}, {insert_2}, .. {insert_(limit)}, {insert_(limit+1)} std::size_t limit = 64; MultiApplier::Operations insertOps; for (std::size_t i = 0; i < limit + 1; ++i) { insertOps.push_back(makeOp(nss)); } MultiApplier::Operations operationsToApply; operationsToApply.push_back(createOp); std::copy(insertOps.begin(), insertOps.end(), std::back_inserter(operationsToApply)); MultiApplier::Operations operationsApplied; auto syncApply = [&operationsApplied](OperationContext*, const BSONObj& op, bool) { operationsApplied.push_back(OplogEntry(op)); return Status::OK(); }; MultiApplier::OperationPtrs ops; for (auto&& op : operationsToApply) { ops.push_back(&op); } ASSERT_OK(multiSyncApply_noAbort(_txn.get(), &ops, syncApply)); // multiSyncApply should combine operations as follows: // {create}, {grouped_insert}, {insert_(limit+1)} ASSERT_EQUALS(3U, operationsApplied.size()); ASSERT_EQUALS(createOp, operationsApplied[0]); const auto& groupedInsertOp = operationsApplied[1]; ASSERT_EQUALS(insertOps.front().getOpTime(), groupedInsertOp.getOpTime()); ASSERT_EQUALS(insertOps.front().ns, groupedInsertOp.ns); ASSERT_EQUALS(BSONType::Array, groupedInsertOp.o.type()); auto groupedInsertDocuments = groupedInsertOp.o.Array(); ASSERT_EQUALS(limit, groupedInsertDocuments.size()); for (std::size_t i = 0; i < limit; ++i) { const auto& insertOp = insertOps[i]; ASSERT_EQUALS(insertOp.o.Obj(), groupedInsertDocuments[i].Obj()); } // (limit + 1)-th insert operations should not be included in group of first (limit) inserts. ASSERT_EQUALS(insertOps.back(), operationsApplied[2]); }
StatusWith<std::pair<std::unique_ptr<MultiApplier>, MultiApplier::Operations>> applyUntilAndPause( executor::TaskExecutor* executor, const MultiApplier::Operations& operations, const MultiApplier::ApplyOperationFn& applyOperation, const MultiApplier::MultiApplyFn& multiApply, const Timestamp& lastTimestampToApply, const PauseDataReplicatorFn& pauseDataReplicator, const MultiApplier::CallbackFn& onCompletion) { try { auto comp = [](const OplogEntry& left, const OplogEntry& right) { uassert(ErrorCodes::FailedToParse, str::stream() << "Operation missing 'ts' field': " << left.raw, left.raw.hasField("ts")); uassert(ErrorCodes::FailedToParse, str::stream() << "Operation missing 'ts' field': " << right.raw, right.raw.hasField("ts")); return left.raw["ts"].timestamp() < right.raw["ts"].timestamp(); }; auto wrapped = OplogEntry(BSON("ts" << lastTimestampToApply)); auto i = std::lower_bound(operations.cbegin(), operations.cend(), wrapped, comp); bool found = i != operations.cend() && !comp(wrapped, *i); auto j = found ? i + 1 : i; MultiApplier::Operations operationsInRange(operations.cbegin(), j); MultiApplier::Operations operationsNotInRange(j, operations.cend()); if (!found) { return std::make_pair( std::unique_ptr<MultiApplier>(new MultiApplier( executor, operationsInRange, applyOperation, multiApply, onCompletion)), operationsNotInRange); } return std::make_pair( std::unique_ptr<MultiApplier>(new MultiApplier(executor, operationsInRange, applyOperation, multiApply, stdx::bind(pauseBeforeCompletion, stdx::placeholders::_1, stdx::placeholders::_2, pauseDataReplicator, onCompletion))), operationsNotInRange); } catch (...) { return exceptionToStatus(); } MONGO_UNREACHABLE; return Status(ErrorCodes::InternalError, "unreachable"); }
TEST_F(SyncTailTest, MultiSyncApplyGroupsInsertOperationByNamespaceBeforeApplying) { int seconds = 0; auto makeOp = [&seconds](const NamespaceString& nss) { return makeInsertDocumentOplogEntry( {Timestamp(Seconds(seconds), 0), 1LL}, nss, BSON("_id" << seconds++)); }; NamespaceString nss1("test." + _agent.getSuiteName() + "_" + _agent.getTestName() + "_1"); NamespaceString nss2("test." + _agent.getSuiteName() + "_" + _agent.getTestName() + "_2"); auto createOp1 = makeCreateCollectionOplogEntry({Timestamp(Seconds(seconds++), 0), 1LL}, nss1); auto createOp2 = makeCreateCollectionOplogEntry({Timestamp(Seconds(seconds++), 0), 1LL}, nss2); auto insertOp1a = makeOp(nss1); auto insertOp1b = makeOp(nss1); auto insertOp2a = makeOp(nss2); auto insertOp2b = makeOp(nss2); MultiApplier::Operations operationsApplied; auto syncApply = [&operationsApplied](OperationContext*, const BSONObj& op, bool) { operationsApplied.push_back(OplogEntry(op)); return Status::OK(); }; MultiApplier::OperationPtrs ops = { &createOp1, &createOp2, &insertOp1a, &insertOp2a, &insertOp1b, &insertOp2b}; ASSERT_OK(multiSyncApply_noAbort(_txn.get(), &ops, syncApply)); ASSERT_EQUALS(4U, operationsApplied.size()); ASSERT_EQUALS(createOp1, operationsApplied[0]); ASSERT_EQUALS(createOp2, operationsApplied[1]); // Check grouped insert operations in namespace "nss1". ASSERT_EQUALS(insertOp1a.getOpTime(), operationsApplied[2].getOpTime()); ASSERT_EQUALS(insertOp1a.ns, operationsApplied[2].ns); ASSERT_EQUALS(BSONType::Array, operationsApplied[2].o.type()); auto group1 = operationsApplied[2].o.Array(); ASSERT_EQUALS(2U, group1.size()); ASSERT_EQUALS(insertOp1a.o.Obj(), group1[0].Obj()); ASSERT_EQUALS(insertOp1b.o.Obj(), group1[1].Obj()); // Check grouped insert operations in namespace "nss2". ASSERT_EQUALS(insertOp2a.getOpTime(), operationsApplied[3].getOpTime()); ASSERT_EQUALS(insertOp2a.ns, operationsApplied[3].ns); ASSERT_EQUALS(BSONType::Array, operationsApplied[3].o.type()); auto group2 = operationsApplied[3].o.Array(); ASSERT_EQUALS(2U, group2.size()); ASSERT_EQUALS(insertOp2a.o.Obj(), group2[0].Obj()); ASSERT_EQUALS(insertOp2b.o.Obj(), group2[1].Obj()); }