static void runUpdateFromOplogWithLock(const char* ns, BSONObj op, bool isRollback) { NamespaceDetails* nsd = nsdetails(ns); const char *names[] = { KEY_STR_PK, KEY_STR_OLD_ROW, KEY_STR_NEW_ROW }; BSONElement fields[3]; op.getFields(3, names, fields); BSONObj pk = fields[0].Obj(); BSONObj oldRow = fields[1].Obj(); BSONObj newRow = fields[2].Obj(); // note the only difference between these two cases is // what is passed as the before image, and what is passed // as after. In normal replication, we replace oldRow with newRow. // In rollback, we replace newRow with oldRow uint64_t flags = (NamespaceDetails::NO_UNIQUE_CHECKS | NamespaceDetails::NO_LOCKTREE); if (isRollback) { // if this is a rollback, then the newRow is what is in the // collections, that we want to replace with oldRow updateOneObject(nsd, pk, newRow, oldRow, NULL, flags); } else { // normal replication case updateOneObject(nsd, pk, oldRow, newRow, NULL, flags); } }
/** @param fromRepl false if from ApplyOpsCmd @return true if was and update should have happened and the document DNE. see replset initial sync code. */ bool applyOperation_inlock(const BSONObj& op, bool fromRepl, bool convertUpdateToUpsert) { LOG(6) << "applying op: " << op << endl; bool failedUpdate = false; OpCounters * opCounters = fromRepl ? &replOpCounters : &globalOpCounters; const char *names[] = { "o", "ns", "op", "b" }; BSONElement fields[4]; op.getFields(4, names, fields); BSONObj o; if( fields[0].isABSONObj() ) o = fields[0].embeddedObject(); const char *ns = fields[1].valuestrsafe(); Lock::assertWriteLocked(ns); NamespaceDetails *nsd = nsdetails(ns); // operation type -- see logOp() comments for types const char *opType = fields[2].valuestrsafe(); if ( *opType == 'i' ) { opCounters->gotInsert(); const char *p = strchr(ns, '.'); if ( p && strcmp(p, ".system.indexes") == 0 ) { // updates aren't allowed for indexes -- so we will do a regular insert. if index already // exists, that is ok. theDataFileMgr.insert(ns, (void*) o.objdata(), o.objsize()); } else { // do upserts for inserts as we might get replayed more than once OpDebug debug; BSONElement _id; if( !o.getObjectID(_id) ) { /* No _id. This will be very slow. */ Timer t; updateObjectsForReplication(ns, o, o, true, false, false, debug, false, QueryPlanSelectionPolicy::idElseNatural() ); if( t.millis() >= 2 ) { RARELY OCCASIONALLY log() << "warning, repl doing slow updates (no _id field) for " << ns << endl; } } else { // probably don't need this since all replicated colls have _id indexes now // but keep it just in case RARELY if ( nsd && !nsd->isCapped() ) { ensureHaveIdIndex(ns); } /* todo : it may be better to do an insert here, and then catch the dup key exception and do update then. very few upserts will not be inserts... */ BSONObjBuilder b; b.append(_id); updateObjectsForReplication(ns, o, b.done(), true, false, false , debug, false, QueryPlanSelectionPolicy::idElseNatural() ); } } }
/** @param fromRepl false if from ApplyOpsCmd @return true if was and update should have happened and the document DNE. see replset initial sync code. */ bool applyOperation_inlock(const BSONObj& op , bool fromRepl ) { assertInWriteLock(); LOG(6) << "applying op: " << op << endl; bool failedUpdate = false; OpCounters * opCounters = fromRepl ? &replOpCounters : &globalOpCounters; const char *names[] = { "o", "ns", "op", "b" }; BSONElement fields[4]; op.getFields(4, names, fields); BSONObj o; if( fields[0].isABSONObj() ) o = fields[0].embeddedObject(); const char *ns = fields[1].valuestrsafe(); NamespaceDetails *nsd = nsdetails(ns); // operation type -- see logOp() comments for types const char *opType = fields[2].valuestrsafe(); if ( *opType == 'i' ) { opCounters->gotInsert(); const char *p = strchr(ns, '.'); if ( p && strcmp(p, ".system.indexes") == 0 ) { // updates aren't allowed for indexes -- so we will do a regular insert. if index already // exists, that is ok. theDataFileMgr.insert(ns, (void*) o.objdata(), o.objsize()); } else { // do upserts for inserts as we might get replayed more than once OpDebug debug; BSONElement _id; if( !o.getObjectID(_id) ) { /* No _id. This will be very slow. */ Timer t; updateObjects(ns, o, o, true, false, false, debug ); if( t.millis() >= 2 ) { RARELY OCCASIONALLY log() << "warning, repl doing slow updates (no _id field) for " << ns << endl; } } else { /* erh 10/16/2009 - this is probably not relevant any more since its auto-created, but not worth removing */ RARELY if (nsd && !nsd->capped) { ensureHaveIdIndex(ns); // otherwise updates will be slow } /* todo : it may be better to do an insert here, and then catch the dup key exception and do update then. very few upserts will not be inserts... */ BSONObjBuilder b; b.append(_id); updateObjects(ns, o, b.done(), true, false, false , debug ); } } }
void applyOperationFromOplog(const BSONObj& op) { LOG(6) << "applying op: " << op << endl; OpCounters* opCounters = &replOpCounters; const char *names[] = { KEY_STR_NS, KEY_STR_OP_NAME }; BSONElement fields[2]; op.getFields(2, names, fields); const char* ns = fields[0].valuestrsafe(); const char* opType = fields[1].valuestrsafe(); if (strcmp(opType, OP_STR_INSERT) == 0) { opCounters->gotInsert(); runInsertFromOplog(ns, op); } else if (strcmp(opType, OP_STR_UPDATE) == 0) { opCounters->gotUpdate(); runUpdateFromOplog(ns, op, false); } else if (strcmp(opType, OP_STR_DELETE) == 0) { opCounters->gotDelete(); runDeleteFromOplog(ns, op); } else if (strcmp(opType, OP_STR_COMMAND) == 0) { opCounters->gotCommand(); runCommandFromOplog(ns, op); } else if (strcmp(opType, OP_STR_COMMENT) == 0) { // no-op } else if (strcmp(opType, OP_STR_CAPPED_INSERT) == 0) { opCounters->gotInsert(); runCappedInsertFromOplog(ns, op); } else if (strcmp(opType, OP_STR_CAPPED_DELETE) == 0) { opCounters->gotDelete(); runCappedDeleteFromOplog(ns, op); } else { throw MsgAssertionException( 14825 , ErrorMsg("error in applyOperation : unknown opType ", *opType) ); } }
void rollbackOperationFromOplog(const BSONObj& op) { LOG(6) << "rolling back op: " << op << endl; const char *names[] = { KEY_STR_NS, KEY_STR_OP_NAME }; BSONElement fields[2]; op.getFields(2, names, fields); const char* ns = fields[0].valuestrsafe(); const char* opType = fields[1].valuestrsafe(); if (strcmp(opType, OP_STR_INSERT) == 0) { runRollbackInsertFromOplog(ns, op); } else if (strcmp(opType, OP_STR_UPDATE) == 0) { runUpdateFromOplog(ns, op, true); } else if (strcmp(opType, OP_STR_DELETE) == 0) { // the rollback of a delete is to do the insert runInsertFromOplog(ns, op); } else if (strcmp(opType, OP_STR_COMMAND) == 0) { rollbackCommandFromOplog(ns, op); } else if (strcmp(opType, OP_STR_COMMENT) == 0) { // no-op } else if (strcmp(opType, OP_STR_CAPPED_INSERT) == 0) { runCappedDeleteFromOplog(ns, op); } else if (strcmp(opType, OP_STR_CAPPED_DELETE) == 0) { runCappedInsertFromOplog(ns, op); } else { throw MsgAssertionException( 16795 , ErrorMsg("error in applyOperation : unknown opType ", *opType) ); } }
bool processObj(const BSONObj &obj) { if (obj.hasField("$err")) { log() << "error getting oplog: " << obj << endl; return false; } static const char *names[] = {"ts", "op", "ns", "o", "b"}; BSONElement fields[5]; obj.getFields(5, names, fields); BSONElement &tsElt = fields[0]; if (!tsElt.ok()) { log() << "oplog format error: " << obj << " missing 'ts' field." << endl; return false; } if (tsElt.type() != Date && tsElt.type() != Timestamp) { log() << "oplog format error: " << obj << " wrong 'ts' field type." << endl; return false; } _thisTime = OpTime(tsElt.date()); BSONElement &opElt = fields[1]; if (!opElt.ok()) { log() << "oplog format error: " << obj << " missing 'op' field." << endl; return false; } StringData op = opElt.Stringdata(); // nop if (op == "n") { return true; } // "presence of a database" if (op == "db") { return true; } if (op != "c" && op != "i" && op != "u" && op != "d") { log() << "oplog format error: " << obj << " has an invalid 'op' field of '" << op << "'." << endl; return false; } if (op != "i" && !_insertBuf.empty()) { flushInserts(); } BSONElement &nsElt = fields[2]; if (!nsElt.ok()) { log() << "oplog format error: " << obj << " missing 'ns' field." << endl; return false; } StringData ns = nsElt.Stringdata(); size_t i = ns.find('.'); if (i == string::npos) { log() << "oplog format error: invalid namespace '" << ns << "' in op " << obj << "." << endl; return false; } StringData dbname = ns.substr(0, i); StringData collname = ns.substr(i + 1); BSONElement &oElt = fields[3]; if (!oElt.ok()) { log() << "oplog format error: " << obj << " missing 'o' field." << endl; return false; } BSONObj o = obj["o"].Obj(); if (op == "c") { if (collname != "$cmd") { log() << "oplog format error: invalid namespace '" << ns << "' for command in op " << obj << "." << endl; return false; } BSONObj info; bool ok = _conn.runCommand(dbname.toString(), o, info); if (!ok) { StringData fieldName = o.firstElementFieldName(); BSONElement errmsgElt = info["errmsg"]; StringData errmsg = errmsgElt.type() == String ? errmsgElt.Stringdata() : ""; bool isDropIndexes = (fieldName == "dropIndexes" || fieldName == "deleteIndexes"); if (((fieldName == "drop" || isDropIndexes) && errmsg == "ns not found") || (isDropIndexes && (errmsg == "index not found" || errmsg.find("can't find index with key:") == 0))) { // This is actually ok. We don't mind dropping something that's not there. LOG(1) << "Tried to replay " << o << ", got " << info << ", ignoring." << endl; } else { log() << "replay of command " << o << " failed: " << info << endl; return false; } } } else { string nsstr = ns.toString(); if (op == "i") { if (collname == "system.indexes") { // Can't ensure multiple indexes in the same batch. flushInserts(); // For now, we need to strip out any background fields from // ensureIndex. Once we do hot indexing we can do something more // like what vanilla applyOperation_inlock does. if (o["background"].trueValue()) { BSONObjBuilder builder; BSONObjIterator it(o); while (it.more()) { BSONElement e = it.next(); if (strncmp(e.fieldName(), "background", sizeof("background")) != 0) { builder.append(e); } } o = builder.obj(); } // We need to warn very carefully about dropDups. if (o["dropDups"].trueValue()) { BSONObjBuilder builder; BSONObjIterator it(o); while (it.more()) { BSONElement e = it.next(); if (strncmp(e.fieldName(), "dropDups", sizeof("dropDups")) != 0) { builder.append(e); } } warning() << "Detected an ensureIndex with dropDups: true in " << o << "." << endl; warning() << "This option is not supported in TokuMX, because it deletes arbitrary data." << endl; warning() << "If it were replayed, it could result in a completely different data set than the source database." << endl; warning() << "We will attempt to replay it without dropDups, but if that fails, you must restart your migration process." << endl; _conn.insert(nsstr, o); string err = _conn.getLastError(dbname.toString(), false, false); if (!err.empty()) { log() << "replay of operation " << obj << " failed: " << err << endl; warning() << "You cannot continue processing this replication stream. You need to restart the migration process." << endl; _running = false; _logAtExit = false; return true; } } } pushInsert(nsstr, o); // Don't call GLE or update _maxOpTimeSynced yet. _thisTime = OpTime(); return true; } else if (op == "u") { BSONElement o2Elt = obj["o2"]; if (!o2Elt.ok()) { log() << "oplog format error: " << obj << " missing 'o2' field." << endl; return false; } BSONElement &bElt = fields[4]; bool upsert = bElt.booleanSafe(); BSONObj o2 = o2Elt.Obj(); _conn.update(nsstr, o2, o, upsert, false); } else if (op == "d") { BSONElement &bElt = fields[4]; bool justOne = bElt.booleanSafe(); _conn.remove(nsstr, o, justOne); } string err = _conn.getLastError(dbname.toString(), false, false); if (!err.empty()) { log() << "replay of operation " << obj << " failed: " << err << endl; return false; } } // If we got here, we completed the operation successfully. _maxOpTimeSynced = _thisTime; _thisTime = OpTime(); return true; }
void applyOperation_inlock(const BSONObj& op , bool fromRepl ) { assertInWriteLock(); LOG(6) << "applying op: " << op << endl; OpCounters * opCounters = fromRepl ? &replOpCounters : &globalOpCounters; const char *names[] = { "o", "ns", "op", "b" }; BSONElement fields[4]; op.getFields(4, names, fields); BSONObj o; if( fields[0].isABSONObj() ) o = fields[0].embeddedObject(); const char *ns = fields[1].valuestrsafe(); // operation type -- see logOp() comments for types const char *opType = fields[2].valuestrsafe(); if ( *opType == 'i' ) { opCounters->gotInsert(); const char *p = strchr(ns, '.'); if ( p && strcmp(p, ".system.indexes") == 0 ) { // updates aren't allowed for indexes -- so we will do a regular insert. if index already // exists, that is ok. theDataFileMgr.insert(ns, (void*) o.objdata(), o.objsize()); } else { // do upserts for inserts as we might get replayed more than once OpDebug debug; BSONElement _id; if( !o.getObjectID(_id) ) { /* No _id. This will be very slow. */ Timer t; updateObjects(ns, o, o, true, false, false, debug ); if( t.millis() >= 2 ) { RARELY OCCASIONALLY log() << "warning, repl doing slow updates (no _id field) for " << ns << endl; } } else { /* erh 10/16/2009 - this is probably not relevant any more since its auto-created, but not worth removing */ RARELY ensureHaveIdIndex(ns); // otherwise updates will be slow /* todo : it may be better to do an insert here, and then catch the dup key exception and do update then. very few upserts will not be inserts... */ BSONObjBuilder b; b.append(_id); updateObjects(ns, o, b.done(), true, false, false , debug ); } } } else if ( *opType == 'u' ) { opCounters->gotUpdate(); RARELY ensureHaveIdIndex(ns); // otherwise updates will be super slow OpDebug debug; updateObjects(ns, o, op.getObjectField("o2"), /*upsert*/ fields[3].booleanSafe(), /*multi*/ false, /*logop*/ false , debug ); } else if ( *opType == 'd' ) { opCounters->gotDelete(); if ( opType[1] == 0 ) deleteObjects(ns, o, /*justOne*/ fields[3].booleanSafe()); else assert( opType[1] == 'b' ); // "db" advertisement } else if ( *opType == 'c' ) { opCounters->gotCommand(); BufBuilder bb; BSONObjBuilder ob; _runCommands(ns, o, bb, ob, true, 0); } else if ( *opType == 'n' ) { // no op } else { throw MsgAssertionException( 14825 , ErrorMsg("error in applyOperation : unknown opType ", *opType) ); } }
/** @param fromRepl false if from ApplyOpsCmd @return true if was and update should have happened and the document DNE. see replset initial sync code. */ bool applyOperation_inlock(OperationContext* txn, Database* db, const BSONObj& op, bool fromRepl, bool convertUpdateToUpsert) { LOG(3) << "applying op: " << op << endl; bool failedUpdate = false; OpCounters * opCounters = fromRepl ? &replOpCounters : &globalOpCounters; const char *names[] = { "o", "ns", "op", "b", "o2" }; BSONElement fields[5]; op.getFields(5, names, fields); BSONElement& fieldO = fields[0]; BSONElement& fieldNs = fields[1]; BSONElement& fieldOp = fields[2]; BSONElement& fieldB = fields[3]; BSONElement& fieldO2 = fields[4]; BSONObj o; if( fieldO.isABSONObj() ) o = fieldO.embeddedObject(); const char *ns = fieldNs.valuestrsafe(); BSONObj o2; if (fieldO2.isABSONObj()) o2 = fieldO2.Obj(); bool valueB = fieldB.booleanSafe(); txn->lockState()->assertWriteLocked(ns); Collection* collection = db->getCollection( txn, ns ); IndexCatalog* indexCatalog = collection == NULL ? NULL : collection->getIndexCatalog(); // operation type -- see logOp() comments for types const char *opType = fieldOp.valuestrsafe(); if ( *opType == 'i' ) { opCounters->gotInsert(); const char *p = strchr(ns, '.'); if ( p && nsToCollectionSubstring( p ) == "system.indexes" ) { if (o["background"].trueValue()) { IndexBuilder* builder = new IndexBuilder(o); // This spawns a new thread and returns immediately. builder->go(); } else { IndexBuilder builder(o); Status status = builder.buildInForeground(txn, db); if ( status.isOK() ) { // yay } else if ( status.code() == ErrorCodes::IndexOptionsConflict || status.code() == ErrorCodes::IndexKeySpecsConflict ) { // SERVER-13206, SERVER-13496 // 2.4 (and earlier) will add an ensureIndex to an oplog if its ok or not // so in 2.6+ where we do stricter validation, it will fail // but we shouldn't care as the primary is responsible warning() << "index creation attempted on secondary that conflicts, " << "skipping: " << status; } else { uassertStatusOK( status ); } } } else { // do upserts for inserts as we might get replayed more than once OpDebug debug; BSONElement _id; if( !o.getObjectID(_id) ) { /* No _id. This will be very slow. */ Timer t; const NamespaceString requestNs(ns); UpdateRequest request(txn, requestNs); request.setQuery(o); request.setUpdates(o); request.setUpsert(); request.setFromReplication(); UpdateLifecycleImpl updateLifecycle(true, requestNs); request.setLifecycle(&updateLifecycle); update(db, request, &debug); if( t.millis() >= 2 ) { RARELY OCCASIONALLY log() << "warning, repl doing slow updates (no _id field) for " << ns << endl; } } else { // probably don't need this since all replicated colls have _id indexes now // but keep it just in case RARELY if ( indexCatalog && !collection->isCapped() && !indexCatalog->haveIdIndex(txn) ) { try { Helpers::ensureIndex(txn, collection, BSON("_id" << 1), true, "_id_"); } catch (const DBException& e) { warning() << "Ignoring error building id index on " << collection->ns() << ": " << e.toString(); } } /* todo : it may be better to do an insert here, and then catch the dup key exception and do update then. very few upserts will not be inserts... */ BSONObjBuilder b; b.append(_id); const NamespaceString requestNs(ns); UpdateRequest request(txn, requestNs); request.setQuery(b.done()); request.setUpdates(o); request.setUpsert(); request.setFromReplication(); UpdateLifecycleImpl updateLifecycle(true, requestNs); request.setLifecycle(&updateLifecycle); update(db, request, &debug); } } }
/** @param fromRepl false if from ApplyOpsCmd @return true if was and update should have happened and the document DNE. see replset initial sync code. */ bool applyOperation_inlock(const BSONObj& op, bool fromRepl, bool convertUpdateToUpsert) { LOG(3) << "applying op: " << op << endl; bool failedUpdate = false; OpCounters * opCounters = fromRepl ? &replOpCounters : &globalOpCounters; const char *names[] = { "o", "ns", "op", "b" }; BSONElement fields[4]; op.getFields(4, names, fields); BSONObj o; if( fields[0].isABSONObj() ) o = fields[0].embeddedObject(); const char *ns = fields[1].valuestrsafe(); Lock::assertWriteLocked(ns); NamespaceDetails *nsd = nsdetails(ns); // operation type -- see logOp() comments for types const char *opType = fields[2].valuestrsafe(); if ( *opType == 'i' ) { opCounters->gotInsert(); const char *p = strchr(ns, '.'); if ( p && strcmp(p, ".system.indexes") == 0 ) { if (o["background"].trueValue()) { IndexBuilder* builder = new IndexBuilder(ns, o); // This spawns a new thread and returns immediately. builder->go(); } else { IndexBuilder builder(ns, o); // Finish the foreground build before returning builder.build(); } } else { // do upserts for inserts as we might get replayed more than once OpDebug debug; BSONElement _id; if( !o.getObjectID(_id) ) { /* No _id. This will be very slow. */ Timer t; const NamespaceString requestNs(ns); UpdateRequest request( requestNs, debug, QueryPlanSelectionPolicy::idElseNatural()); request.setQuery(o); request.setUpdates(o); request.setUpsert(); request.setFromReplication(); update(request); if( t.millis() >= 2 ) { RARELY OCCASIONALLY log() << "warning, repl doing slow updates (no _id field) for " << ns << endl; } } else { // probably don't need this since all replicated colls have _id indexes now // but keep it just in case RARELY if ( nsd && !nsd->isCapped() ) { ensureHaveIdIndex(ns, false); } /* todo : it may be better to do an insert here, and then catch the dup key exception and do update then. very few upserts will not be inserts... */ BSONObjBuilder b; b.append(_id); const NamespaceString requestNs(ns); UpdateRequest request( requestNs, debug, QueryPlanSelectionPolicy::idElseNatural()); request.setQuery(b.done()); request.setUpdates(o); request.setUpsert(); request.setFromReplication(); update(request); } } }