Status applyAbortTransaction(OperationContext* opCtx,
                             const OplogEntry& entry,
                             repl::OplogApplication::Mode mode) {
    // Return error if run via applyOps command.
    uassert(50972,
            "abortTransaction is only used internally by secondaries.",
            mode != repl::OplogApplication::Mode::kApplyOpsCmd);

    // We don't put transactions into the prepare state until the end of recovery and initial sync,
    // so there is no transaction to abort.
    if (mode == repl::OplogApplication::Mode::kRecovering ||
        mode == repl::OplogApplication::Mode::kInitialSync) {
        return Status::OK();
    }

    invariant(mode == repl::OplogApplication::Mode::kSecondary);

    // Transaction operations are in its own batch, so we can modify their opCtx.
    invariant(entry.getSessionId());
    invariant(entry.getTxnNumber());
    opCtx->setLogicalSessionId(*entry.getSessionId());
    opCtx->setTxnNumber(*entry.getTxnNumber());
    // The write on transaction table may be applied concurrently, so refreshing state
    // from disk may read that write, causing starting a new transaction on an existing
    // txnNumber. Thus, we start a new transaction without refreshing state from disk.
    MongoDOperationContextSessionWithoutRefresh sessionCheckout(opCtx);

    auto transaction = TransactionParticipant::get(opCtx);
    transaction.unstashTransactionResources(opCtx, "abortTransaction");
    transaction.abortActiveTransaction(opCtx);
    return Status::OK();
}
示例#2
0
Status SyncSourceResolver::_compareRequiredOpTimeWithQueryResponse(
    const Fetcher::QueryResponse& queryResponse) {
    if (queryResponse.documents.empty()) {
        return Status(
            ErrorCodes::NoMatchingDocument,
            "remote oplog does not contain entry with optime matching our required optime");
    }
    const OplogEntry oplogEntry(queryResponse.documents.front());
    const auto opTime = oplogEntry.getOpTime();
    if (_requiredOpTime != opTime) {
        return Status(ErrorCodes::BadValue,
                      str::stream() << "remote oplog contain entry with matching timestamp "
                                    << opTime.getTimestamp().toString()
                                    << " but optime "
                                    << opTime.toString()
                                    << " does not "
                                       "match our required optime");
    }
    if (_requiredOpTime.getTerm() != opTime.getTerm()) {
        return Status(ErrorCodes::BadValue,
                      str::stream() << "remote oplog contain entry with term " << opTime.getTerm()
                                    << " that does not "
                                       "match the term in our required optime");
    }
    return Status::OK();
}
示例#3
0
文件: prefetch.cpp 项目: i80and/mongo
// prefetch for an oplog operation
void prefetchPagesForReplicatedOp(OperationContext* opCtx,
                                  Database* db,
                                  const OplogEntry& oplogEntry) {
    invariant(db);
    const ReplSettings::IndexPrefetchConfig prefetchConfig =
        ReplicationCoordinator::get(opCtx)->getIndexPrefetchConfig();

    // Prefetch ignores non-CRUD operations.
    if (!oplogEntry.isCrudOpType()) {
        return;
    }

    // This will have to change for engines other than MMAP V1, because they might not have
    // means for directly prefetching pages from the collection. For this purpose, acquire S
    // lock on the database, instead of optimizing with IS.
    const auto& nss = oplogEntry.getNamespace();
    Lock::CollectionLock collLock(opCtx->lockState(), nss.ns(), MODE_S);

    Collection* collection = db->getCollection(opCtx, nss);
    if (!collection) {
        return;
    }

    auto opType = oplogEntry.getOpType();
    LOG(4) << "index prefetch for op " << OpType_serializer(opType);

    // should we prefetch index pages on updates? if the update is in-place and doesn't change
    // indexed values, it is actually slower - a lot slower if there are a dozen indexes or
    // lots of multikeys.  possible variations (not all mutually exclusive):
    //  1) current behavior: full prefetch
    //  2) don't do it for updates
    //  3) don't do multikey indexes for updates
    //  4) don't prefetchIndexPages on some heuristic; e.g., if it's an $inc.
    //  5) if not prefetching index pages (#2), we should do it if we are upsertings and it
    //     will be an insert. to do that we could do the prefetchRecordPage first and if DNE
    //     then we do #1.
    //
    // note that on deletes 'obj' does not have all the keys we would want to prefetch on.
    // a way to achieve that would be to prefetch the record first, and then afterwards do
    // this part.
    //
    auto obj = oplogEntry.getOperationToApply();
    invariant(!obj.isEmpty());
    prefetchIndexPages(opCtx, collection, prefetchConfig, obj);

    // do not prefetch the data for inserts; it doesn't exist yet
    //
    // we should consider doing the record prefetch for the delete op case as we hit the record
    // when we delete.  note if done we only want to touch the first page.
    //
    // update: do record prefetch.
    if ((opType == OpTypeEnum::kUpdate) &&
        // do not prefetch the data for capped collections because
        // they typically do not have an _id index for findById() to use.
        !collection->isCapped()) {
        prefetchRecordPages(opCtx, db, nss.ns().c_str(), obj);
    }
}
repl::MultiApplier::Operations readTransactionOperationsFromOplogChain(
    OperationContext* opCtx,
    const OplogEntry& commitOrPrepare,
    const std::vector<OplogEntry*> cachedOps) {
    repl::MultiApplier::Operations ops;

    // Get the previous oplog entry.
    auto currentOpTime = commitOrPrepare.getOpTime();

    // The cachedOps are the ops for this transaction that are from the same oplog application batch
    // as the commit or prepare, those which have not necessarily been written to the oplog.  These
    // ops are in order of increasing timestamp.

    // The lastEntryOpTime is the OpTime of the last (latest OpTime) entry for this transaction
    // which is expected to be present in the oplog.  It is the entry before the first cachedOp,
    // unless there are no cachedOps in which case it is the entry before the commit or prepare.
    const auto lastEntryOpTime = (cachedOps.empty() ? commitOrPrepare : *cachedOps.front())
                                     .getPrevWriteOpTimeInTransaction();
    invariant(lastEntryOpTime < currentOpTime);

    TransactionHistoryIterator iter(lastEntryOpTime.get());
    // Empty commits are not allowed, but empty prepares are.
    invariant(commitOrPrepare.getCommandType() != OplogEntry::CommandType::kCommitTransaction ||
              !cachedOps.empty() || iter.hasNext());
    auto commitOrPrepareObj = commitOrPrepare.toBSON();

    // First retrieve and transform the ops from the oplog, which will be retrieved in reverse
    // order.
    while (iter.hasNext()) {
        const auto& operationEntry = iter.next(opCtx);
        invariant(operationEntry.isPartialTransaction());
        auto prevOpsEnd = ops.size();
        _reconstructPartialTxnEntryAtGivenTime(operationEntry, commitOrPrepareObj, &ops);
        // Because BSONArrays do not have fast way of determining size without iterating through
        // them, and we also have no way of knowing how many oplog entries are in a transaction
        // without iterating, reversing each applyOps and then reversing the whole array is
        // about as good as we can do to get the entire thing in chronological order.  Fortunately
        // STL arrays of BSON objects should be fast to reverse (just pointer copies).
        std::reverse(ops.begin() + prevOpsEnd, ops.end());
    }
    std::reverse(ops.begin(), ops.end());

    // Next retrieve and transform the ops from the current batch, which are in increasing timestamp
    // order.
    for (auto* cachedOp : cachedOps) {
        const auto& operationEntry = *cachedOp;
        invariant(operationEntry.isPartialTransaction());
        _reconstructPartialTxnEntryAtGivenTime(operationEntry, commitOrPrepareObj, &ops);
    }
    return ops;
}
bool FieldUpdateSubscription::matchesWithEntry(const OplogEntry& entry) const {
    if (!BasicOperationSubscription::matchesWithEntry(entry)) {
        return false;
    }

    mongo::BSONObj updateDoc = entry.getUpdateDocument();
    for (std::vector<MongoUpdateLogOperation>::const_iterator iter = subscribedOperations.begin(); iter != subscribedOperations.end() ; iter++) {
        std::string operationString = stringForUpdateOperation(*iter);
        if (!operationString.empty()) {
            if (updateDoc.hasField(operationString)) {
                mongo::BSONObj operationObj = updateDoc.getObjectField(operationString.c_str());
                if (operationObj.hasField(fieldName)) {
                    return true;
                }
            }
        } else if (updateDoc.hasField(fieldName)) {
            return true;
        }
    }

    return false;
}
Status applyCommitTransaction(OperationContext* opCtx,
                              const OplogEntry& entry,
                              repl::OplogApplication::Mode mode) {
    // Return error if run via applyOps command.
    uassert(50987,
            "commitTransaction is only used internally by secondaries.",
            mode != repl::OplogApplication::Mode::kApplyOpsCmd);

    IDLParserErrorContext ctx("commitTransaction");
    auto commitOplogEntryOpTime = entry.getOpTime();
    auto commitCommand = CommitTransactionOplogObject::parse(ctx, entry.getObject());
    const bool prepared = !commitCommand.getPrepared() || *commitCommand.getPrepared();
    if (!prepared)
        return Status::OK();
    invariant(commitCommand.getCommitTimestamp());

    if (mode == repl::OplogApplication::Mode::kRecovering ||
        mode == repl::OplogApplication::Mode::kInitialSync) {
        return _applyTransactionFromOplogChain(opCtx,
                                               entry,
                                               mode,
                                               *commitCommand.getCommitTimestamp(),
                                               commitOplogEntryOpTime.getTimestamp());
    }

    invariant(mode == repl::OplogApplication::Mode::kSecondary);

    // Transaction operations are in its own batch, so we can modify their opCtx.
    invariant(entry.getSessionId());
    invariant(entry.getTxnNumber());
    opCtx->setLogicalSessionId(*entry.getSessionId());
    opCtx->setTxnNumber(*entry.getTxnNumber());

    // The write on transaction table may be applied concurrently, so refreshing state
    // from disk may read that write, causing starting a new transaction on an existing
    // txnNumber. Thus, we start a new transaction without refreshing state from disk.
    MongoDOperationContextSessionWithoutRefresh sessionCheckout(opCtx);

    auto transaction = TransactionParticipant::get(opCtx);
    invariant(transaction);
    transaction.unstashTransactionResources(opCtx, "commitTransaction");
    transaction.commitPreparedTransaction(
        opCtx, *commitCommand.getCommitTimestamp(), commitOplogEntryOpTime);
    return Status::OK();
}
bool BasicOperationSubscription::matchesWithEntry(const OplogEntry & entry) const {
	return entry.getOperation() == operation;
}
示例#8
0
StatusWith<std::set<NamespaceString>> RollbackImpl::_namespacesForOp(const OplogEntry& oplogEntry) {
    NamespaceString opNss = oplogEntry.getNamespace();
    OpTypeEnum opType = oplogEntry.getOpType();
    std::set<NamespaceString> namespaces;

    // No namespaces for a no-op.
    if (opType == OpTypeEnum::kNoop) {
        return std::set<NamespaceString>();
    }

    // CRUD ops have the proper namespace in the operation 'ns' field.
    if (opType == OpTypeEnum::kInsert || opType == OpTypeEnum::kUpdate ||
        opType == OpTypeEnum::kDelete) {
        return std::set<NamespaceString>({opNss});
    }

    // If the operation is a command, then we need to extract the appropriate namespaces from the
    // command object, as opposed to just using the 'ns' field of the oplog entry itself.
    if (opType == OpTypeEnum::kCommand) {
        auto obj = oplogEntry.getObject();
        auto firstElem = obj.firstElement();

        // Does not handle 'applyOps' entries.
        invariant(oplogEntry.getCommandType() != OplogEntry::CommandType::kApplyOps,
                  "_namespacesForOp does not handle 'applyOps' oplog entries.");

        switch (oplogEntry.getCommandType()) {
            case OplogEntry::CommandType::kRenameCollection: {
                // Add both the 'from' and 'to' namespaces.
                namespaces.insert(NamespaceString(firstElem.valuestrsafe()));
                namespaces.insert(NamespaceString(obj.getStringField("to")));
                break;
            }
            case OplogEntry::CommandType::kDropDatabase: {
                // There is no specific namespace to save for a drop database operation.
                break;
            }
            case OplogEntry::CommandType::kDbCheck:
            case OplogEntry::CommandType::kConvertToCapped:
            case OplogEntry::CommandType::kEmptyCapped: {
                // These commands do not need to be supported by rollback. 'convertToCapped' should
                // always be converted to lower level DDL operations, and 'emptycapped' is a
                // testing-only command.
                std::string message = str::stream() << "Encountered unsupported command type '"
                                                    << firstElem.fieldName()
                                                    << "' during rollback.";
                return Status(ErrorCodes::UnrecoverableRollbackError, message);
            }
            case OplogEntry::CommandType::kCreate:
            case OplogEntry::CommandType::kDrop:
            case OplogEntry::CommandType::kCreateIndexes:
            case OplogEntry::CommandType::kDropIndexes:
            case OplogEntry::CommandType::kCollMod: {
                // For all other command types, we should be able to parse the collection name from
                // the first command argument.
                try {
                    auto cmdNss = CommandHelpers::parseNsCollectionRequired(opNss.db(), obj);
                    namespaces.insert(cmdNss);
                } catch (const DBException& ex) {
                    return ex.toStatus();
                }
                break;
            }
            case OplogEntry::CommandType::kApplyOps:
            default:
                // Every possible command type should be handled above.
                MONGO_UNREACHABLE
        }
    }