Esempio n. 1
0
 // takes an entry that was written _logTransactionOps
 // and applies them to collections
 //
 // TODO: possibly improve performance of this. We create and destroy a
 // context for each operation. Find a way to amortize it out if necessary
 //
 void applyTransactionFromOplog(BSONObj entry) {
     bool transactionAlreadyApplied = entry["a"].Bool();
     if (!transactionAlreadyApplied) {
         Client::Transaction transaction(DB_SERIALIZABLE);
         if (entry.hasElement("ref")) {
             applyRefOp(entry);
         } else if (entry.hasElement("ops")) {
             applyOps(entry["ops"].Array());
         } else {
             verify(0);
         }
         // set the applied bool to true, to let the oplog know that
         // this entry has been applied to collections
         BSONElementManipulator(entry["a"]).setBool(true);
         {
             LOCK_REASON(lockReason, "repl: setting oplog entry's applied bit");
             Lock::DBRead lk1("local", lockReason);
             writeEntryToOplog(entry, false);
         }
         // If this code fails, it is impossible to recover from
         // because we don't know if the transaction successfully committed
         // so we might as well crash
         // There is currently no known way this code can throw an exception
         try {
             // we are operating as a secondary. We don't have to fsync
             transaction.commit(DB_TXN_NOSYNC);
         }
         catch (std::exception &e) {
             log() << "exception during commit of applyTransactionFromOplog, aborting system: " << e.what() << endl;
             printStackTrace();
             logflush();
             ::abort();
         }
     }
 }
Esempio n. 2
0
 // find all oplog entries for a given OID in the oplog.refs collection and apply them
 // TODO this should be a range query on oplog.refs where _id.oid == oid and applyOps to
 // each entry found.  The locking of the query interleaved with the locking in the applyOps
 // did not work, so it a sequence of point queries.  
 // TODO verify that the query plan is a indexed lookup.
 // TODO verify that the query plan does not fetch too many docs and then only process one of them.
 void applyRefOp(BSONObj entry) {
     OID oid = entry["ref"].OID();
     LOG(3) << "apply ref " << entry << " oid " << oid << endl;
     long long seq = 0; // note that 0 is smaller than any of the seq numbers
     while (1) {
         BSONObj entry;
         {
             LOCK_REASON(lockReason, "repl: finding oplog.refs entry to apply");
             Client::ReadContext ctx(rsOplogRefs, lockReason);
             // TODO: Should this be using rsOplogRefsDetails, verifying non-null?
             Collection *cl = getCollection(rsOplogRefs);
             if (cl == NULL || !cl->findOne(BSON("_id" << BSON("$gt" << BSON("oid" << oid << "seq" << seq))), entry, true)) {
                 break;
             }
         }
         BSONElement e = entry.getFieldDotted("_id.seq");
         seq = e.Long();
         BSONElement eOID = entry.getFieldDotted("_id.oid");
         if (oid != eOID.OID()) {
             break;
         }
         LOG(3) << "apply " << entry << " seq=" << seq << endl;
         applyOps(entry["ops"].Array());
     }
 }
Status applyCommitTransaction(OperationContext* opCtx,
                              const repl::OplogEntry& entry,
                              repl::OplogApplication::Mode mode) {
    // Return error if run via applyOps command.
    uassert(50987,
            "commitTransaction is only used internally by secondaries.",
            mode != repl::OplogApplication::Mode::kApplyOpsCmd);

    IDLParserErrorContext ctx("commitTransaction");
    auto commitCommand = CommitTransactionOplogObject::parse(ctx, entry.getObject());

    if (mode == repl::OplogApplication::Mode::kRecovering) {
        const auto replCoord = repl::ReplicationCoordinator::get(opCtx);
        const auto recoveryTimestamp = replCoord->getRecoveryTimestamp();
        invariant(recoveryTimestamp);

        // If the commitTimestamp is before the recoveryTimestamp, then the data already
        // reflects the operations from the transaction.
        const auto& commitTimestamp = commitCommand.getCommitTimestamp();
        if (recoveryTimestamp.get() > commitTimestamp) {
            return Status::OK();
        }

        // Get the corresponding prepareTransaction oplog entry.
        TransactionHistoryIterator iter(entry.getOpTime());
        invariant(iter.hasNext());
        const auto commitOplogEntry = iter.next(opCtx);
        invariant(iter.hasNext());
        const auto prepareOplogEntry = iter.next(opCtx);

        // Transform prepare command into a normal applyOps command.
        const auto prepareCmd = prepareOplogEntry.getOperationToApply().removeField("prepare");

        BSONObjBuilder resultWeDontCareAbout;
        return applyOps(
            opCtx, entry.getNss().db().toString(), prepareCmd, mode, &resultWeDontCareAbout);
    }

    // TODO: SERVER-36492 Only run on secondary until we support initial sync.
    invariant(mode == repl::OplogApplication::Mode::kSecondary);

    // Transaction operations are in its own batch, so we can modify their opCtx.
    invariant(entry.getSessionId());
    invariant(entry.getTxnNumber());
    opCtx->setLogicalSessionId(*entry.getSessionId());
    opCtx->setTxnNumber(*entry.getTxnNumber());
    // The write on transaction table may be applied concurrently, so refreshing state
    // from disk may read that write, causing starting a new transaction on an existing
    // txnNumber. Thus, we start a new transaction without refreshing state from disk.
    MongoDOperationContextSessionWithoutRefresh sessionCheckout(opCtx);

    auto transaction = TransactionParticipant::get(opCtx);
    invariant(transaction);
    transaction->unstashTransactionResources(opCtx, "commitTransaction");
    transaction->commitPreparedTransaction(opCtx, commitCommand.getCommitTimestamp());
    return Status::OK();
}
Esempio n. 4
0
    // Doles out all the work to the writer pool threads and waits for them to complete
    void SyncTail::multiApply( std::deque<BSONObj>& ops, MultiSyncApplyFunc applyFunc ) {

        // Use a ThreadPool to prefetch all the operations in a batch.
        prefetchOps(ops);
        
        std::vector< std::vector<BSONObj> > writerVectors(theReplSet->replWriterThreadCount);
        fillWriterVectors(ops, &writerVectors);
        LOG(2) << "replication batch size is " << ops.size() << endl;
        // We must grab this because we're going to grab write locks later.
        // We hold this mutex the entire time we're writing; it doesn't matter
        // because all readers are blocked anyway.
        SimpleMutex::scoped_lock fsynclk(filesLockedFsync);

        // stop all readers until we're done
        Lock::ParallelBatchWriterMode pbwm;

        applyOps(writerVectors, applyFunc);
    }
// Applies a batch of oplog entries, by using a set of threads to apply the operations and then
// writes the oplog entries to the local oplog.
OpTime SyncTail::multiApply(OperationContext* txn, const OpQueue& ops) {
    invariant(_applyFunc);

    if (getGlobalServiceContext()->getGlobalStorageEngine()->isMmapV1()) {
        // Use a ThreadPool to prefetch all the operations in a batch.
        prefetchOps(ops.getDeque(), &_prefetcherPool);
    }

    std::vector<std::vector<BSONObj>> writerVectors(replWriterThreadCount);

    fillWriterVectors(txn, ops.getDeque(), &writerVectors);
    LOG(2) << "replication batch size is " << ops.getDeque().size() << endl;
    // We must grab this because we're going to grab write locks later.
    // We hold this mutex the entire time we're writing; it doesn't matter
    // because all readers are blocked anyway.
    stdx::lock_guard<SimpleMutex> fsynclk(filesLockedFsync);

    // stop all readers until we're done
    Lock::ParallelBatchWriterMode pbwm(txn->lockState());

    ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
    if (replCoord->getMemberState().primary() && !replCoord->isWaitingForApplierToDrain()) {
        severe() << "attempting to replicate ops while primary";
        fassertFailed(28527);
    }

    applyOps(writerVectors, &_writerPool, _applyFunc, this);

    OpTime lastOpTime;
    {
        ON_BLOCK_EXIT([&] { _writerPool.join(); });
        std::vector<BSONObj> raws;
        raws.reserve(ops.getDeque().size());
        for (auto&& op : ops.getDeque()) {
            raws.emplace_back(op.raw);
        }
        lastOpTime = writeOpsToOplog(txn, raws);
        if (inShutdown()) {
            return OpTime();
        }
    }
    // We have now written all database writes and updated the oplog to match.
    return lastOpTime;
}
Esempio n. 6
0
 // find all oplog entries for a given OID in the oplog.refs collection and apply them
 // TODO this should be a range query on oplog.refs where _id.oid == oid and applyOps to
 // each entry found.  The locking of the query interleaved with the locking in the applyOps
 // did not work, so it a sequence of point queries.  
 // TODO verify that the query plan is a indexed lookup.
 // TODO verify that the query plan does not fetch too many docs and then only process one of them.
 void applyRefOp(BSONObj entry) {
     OID oid = entry["ref"].OID();
     LOG(3) << "apply ref " << entry << " oid " << oid << endl;
     long long seq = 0; // note that 0 is smaller than any of the seq numbers
     while (1) {
         BSONObj entry;
         {
             Client::ReadContext ctx(rsOplogRefs);
             // TODO: Should this be using rsOplogRefsDetails, verifying non-null?
             NamespaceDetails *d = nsdetails(rsOplogRefs);
             if (d == NULL || !d->findOne(BSON("_id" << BSON("$gt" << BSON("oid" << oid << "seq" << seq))), entry, true)) {
                 break;
             }
         }
         BSONElement e = entry.getFieldDotted("_id.seq");
         seq = e.Long();
         BSONElement eOID = entry.getFieldDotted("_id.oid");
         if (oid != eOID.OID()) {
             break;
         }
         LOG(3) << "apply " << entry << " seq=" << seq << endl;
         applyOps(entry["ops"].Array());
     }
 }