Ejemplo n.º 1
0
/**
 * Creates a create collection oplog entry with given optime.
 */
OplogEntry makeCreateCollectionOplogEntry(OpTime opTime,
                                          const NamespaceString& nss = NamespaceString("test.t")) {
    BSONObjBuilder bob;
    bob.appendElements(opTime.toBSON());
    bob.append("h", 1LL);
    bob.append("op", "c");
    bob.append("ns", nss.getCommandNS());
    bob.append("o", BSON("create" << nss.coll()));
    return OplogEntry(bob.obj());
}
Ejemplo n.º 2
0
/**
 * Creates an insert oplog entry with given optime and namespace.
 */
OplogEntry makeInsertDocumentOplogEntry(OpTime opTime,
                                        const NamespaceString& nss,
                                        const BSONObj& documentToInsert) {
    BSONObjBuilder bob;
    bob.appendElements(opTime.toBSON());
    bob.append("h", 1LL);
    bob.append("op", "i");
    bob.append("ns", nss.ns());
    bob.append("o", documentToInsert);
    return OplogEntry(bob.obj());
}
Ejemplo n.º 3
0
/**
 * Creates an update oplog entry with given optime and namespace.
 */
OplogEntry makeUpdateDocumentOplogEntry(OpTime opTime,
                                        const NamespaceString& nss,
                                        const BSONObj& documentToUpdate,
                                        const BSONObj& updatedDocument) {
    BSONObjBuilder bob;
    bob.appendElements(opTime.toBSON());
    bob.append("h", 1LL);
    bob.append("op", "u");
    bob.append("ns", nss.ns());
    bob.append("o2", documentToUpdate);
    bob.append("o", updatedDocument);
    return OplogEntry(bob.obj());
}
Ejemplo n.º 4
0
void ReplicationRecoveryImpl::_applyToEndOfOplog(OperationContext* opCtx,
                                                 const Timestamp& oplogApplicationStartPoint,
                                                 const Timestamp& topOfOplog) {
    invariant(!oplogApplicationStartPoint.isNull());
    invariant(!topOfOplog.isNull());

    // Check if we have any unapplied ops in our oplog. It is important that this is done after
    // deleting the ragged end of the oplog.
    if (oplogApplicationStartPoint == topOfOplog) {
        log() << "No oplog entries to apply for recovery. Start point is at the top of the oplog.";
        return;  // We've applied all the valid oplog we have.
    } else if (oplogApplicationStartPoint > topOfOplog) {
        severe() << "Applied op " << oplogApplicationStartPoint.toBSON()
                 << " not found. Top of oplog is " << topOfOplog.toBSON() << '.';
        fassertFailedNoTrace(40313);
    }

    log() << "Replaying stored operations from " << oplogApplicationStartPoint.toBSON()
          << " (exclusive) to " << topOfOplog.toBSON() << " (inclusive).";

    OplogBufferLocalOplog oplogBuffer(oplogApplicationStartPoint);
    oplogBuffer.startup(opCtx);

    RecoveryOplogApplierStats stats;

    auto writerPool = OplogApplier::makeWriterPool();
    OplogApplier::Options options;
    options.allowNamespaceNotFoundErrorsOnCrudOps = true;
    options.skipWritesToOplog = true;
    // During replication recovery, the stableTimestampForRecovery refers to the stable timestamp
    // from which we replay the oplog.
    // For startup recovery, this will be the recovery timestamp, which is the stable timestamp that
    // the storage engine recovered to on startup. For rollback recovery, this will be the last
    // stable timestamp, returned when we call recoverToStableTimestamp.
    // We keep track of this for prepared transactions so that when we apply a commitTransaction
    // oplog entry, we can check if it occurs before or after the stable timestamp and decide
    // whether the operations would have already been reflected in the data.
    options.stableTimestampForRecovery = oplogApplicationStartPoint;
    OplogApplierImpl oplogApplier(nullptr,
                                  &oplogBuffer,
                                  &stats,
                                  nullptr,
                                  _consistencyMarkers,
                                  _storageInterface,
                                  options,
                                  writerPool.get());

    OplogApplier::BatchLimits batchLimits;
    batchLimits.bytes = OplogApplier::calculateBatchLimitBytes(opCtx, _storageInterface);
    batchLimits.ops = OplogApplier::getBatchLimitOperations();

    OpTime applyThroughOpTime;
    OplogApplier::Operations batch;
    while (
        !(batch = fassert(50763, oplogApplier.getNextApplierBatch(opCtx, batchLimits))).empty()) {
        applyThroughOpTime = uassertStatusOK(oplogApplier.multiApply(opCtx, std::move(batch)));
    }
    stats.complete(applyThroughOpTime);
    invariant(oplogBuffer.isEmpty(),
              str::stream() << "Oplog buffer not empty after applying operations. Last operation "
                               "applied with optime: "
                            << applyThroughOpTime.toBSON());
    invariant(applyThroughOpTime.getTimestamp() == topOfOplog,
              str::stream() << "Did not apply to top of oplog. Applied through: "
                            << applyThroughOpTime.toString()
                            << ". Top of oplog: "
                            << topOfOplog.toString());
    oplogBuffer.shutdown(opCtx);

    // We may crash before setting appliedThrough. If we have a stable checkpoint, we will recover
    // to that checkpoint at a replication consistent point, and applying the oplog is safe.
    // If we don't have a stable checkpoint, then we must be in startup recovery, and not rollback
    // recovery, because we only roll back to a stable timestamp when we have a stable checkpoint.
    // Startup recovery from an unstable checkpoint only ever applies a single batch and it is safe
    // to replay the batch from any point.
    _consistencyMarkers->setAppliedThrough(opCtx, applyThroughOpTime);
}