StatusWith<OpTime> ReplicationCoordinatorExternalStateImpl::loadLastOpTime( OperationContext* txn) { // TODO: handle WriteConflictExceptions below try { BSONObj oplogEntry; if (!Helpers::getLast(txn, rsOplogName.c_str(), oplogEntry)) { return StatusWith<OpTime>( ErrorCodes::NoMatchingDocument, str::stream() << "Did not find any entries in " << rsOplogName); } BSONElement tsElement = oplogEntry[tsFieldName]; if (tsElement.eoo()) { return StatusWith<OpTime>( ErrorCodes::NoSuchKey, str::stream() << "Most recent entry in " << rsOplogName << " missing \"" << tsFieldName << "\" field"); } if (tsElement.type() != bsonTimestamp) { return StatusWith<OpTime>( ErrorCodes::TypeMismatch, str::stream() << "Expected type of \"" << tsFieldName << "\" in most recent " << rsOplogName << " entry to have type Timestamp, but found " << typeName(tsElement.type())); } // TODO(siyuan) add term return StatusWith<OpTime>(OpTime(tsElement.timestamp(), 0)); } catch (const DBException& ex) { return StatusWith<OpTime>(ex.toStatus()); } }
Status bsonExtractTimestampField(const BSONObj& object, StringData fieldName, Timestamp* out) { BSONElement element; Status status = bsonExtractTypedField(object, fieldName, bsonTimestamp, &element); if (status.isOK()) *out = element.timestamp(); return status; }
StatusWith<ShardingMetadata> ShardingMetadata::readFromMetadata(const BSONObj& metadataObj) { BSONElement smElem; auto smExtractStatus = bsonExtractTypedField(metadataObj, kGLEStatsFieldName, mongo::Object, &smElem); if (!smExtractStatus.isOK()) { return smExtractStatus; } if (smElem.embeddedObject().nFields() != 2) { return Status(ErrorCodes::InvalidOptions, str::stream() << "The $gleStats object can only have 2 fields, but got " << smElem.embeddedObject().toString()); } BSONElement lastOpTimeElem; auto lastOpTimeExtractStatus = bsonExtractTypedField(smElem.embeddedObject(), kGLEStatsLastOpTimeFieldName, mongo::bsonTimestamp, &lastOpTimeElem); if (!lastOpTimeExtractStatus.isOK()) { return lastOpTimeExtractStatus; } BSONElement lastElectionIdElem; auto lastElectionIdExtractStatus = bsonExtractTypedField( smElem.embeddedObject(), kGLEStatsElectionIdFieldName, mongo::jstOID, &lastElectionIdElem); if (!lastElectionIdExtractStatus.isOK()) { return lastElectionIdExtractStatus; } return ShardingMetadata(lastOpTimeElem.timestamp(), lastElectionIdElem.OID()); }
/** * data and len must be the arguments from RecordStore::insert() on an oplog collection. */ StatusWith<RecordId> extractKey(const char* data, int len) { DEV invariant(validateBSON(data, len).isOK()); const BSONObj obj(data); const BSONElement elem = obj["ts"]; if (elem.eoo()) return StatusWith<RecordId>(ErrorCodes::BadValue, "no ts field"); if (elem.type() != bsonTimestamp) return StatusWith<RecordId>(ErrorCodes::BadValue, "ts must be a Timestamp"); return keyForOptime(elem.timestamp()); }
/** * data and len must be the arguments from RecordStore::insert() on an oplog collection. */ StatusWith<RecordId> extractKey(const char* data, int len) { // Use the latest BSON validation version. Oplog entries are allowed to contain decimal data // even if decimal is disabled. DEV invariant(validateBSON(data, len, BSONVersion::kLatest).isOK()); const BSONObj obj(data); const BSONElement elem = obj["ts"]; if (elem.eoo()) return StatusWith<RecordId>(ErrorCodes::BadValue, "no ts field"); if (elem.type() != bsonTimestamp) return StatusWith<RecordId>(ErrorCodes::BadValue, "ts must be a Timestamp"); return keyForOptime(elem.timestamp()); }
StatusWith<ShardingMetadata> ShardingMetadata::readFromMetadata(const BSONObj& metadataObj) { BSONElement smElem; auto smExtractStatus = bsonExtractTypedField(metadataObj, kGLEStatsFieldName, mongol::Object, &smElem); if (!smExtractStatus.isOK()) { return smExtractStatus; } if (smElem.embeddedObject().nFields() != 2) { return Status(ErrorCodes::InvalidOptions, str::stream() << "The $gleStats object can only have 2 fields, but got " << smElem.embeddedObject().toString()); } repl::OpTime opTime; const BSONElement opTimeElement = smElem.embeddedObject()[kGLEStatsLastOpTimeFieldName]; if (opTimeElement.eoo()) { return Status(ErrorCodes::NoSuchKey, "lastOpTime field missing"); } else if (opTimeElement.type() == bsonTimestamp) { opTime = repl::OpTime(opTimeElement.timestamp(), repl::OpTime::kUninitializedTerm); } else if (opTimeElement.type() == Date) { opTime = repl::OpTime(Timestamp(opTimeElement.date()), repl::OpTime::kUninitializedTerm); } else if (opTimeElement.type() == Object) { Status status = bsonExtractOpTimeField(smElem.embeddedObject(), kGLEStatsLastOpTimeFieldName, &opTime); if (!status.isOK()) { return status; } } else { return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected \"" << kGLEStatsLastOpTimeFieldName << "\" field in response to replSetHeartbeat " "command to have type Date or Timestamp, but found type " << typeName(opTimeElement.type())); } BSONElement lastElectionIdElem; auto lastElectionIdExtractStatus = bsonExtractTypedField( smElem.embeddedObject(), kGLEStatsElectionIdFieldName, mongol::jstOID, &lastElectionIdElem); if (!lastElectionIdExtractStatus.isOK()) { return lastElectionIdExtractStatus; } return ShardingMetadata(opTime, lastElectionIdElem.OID()); }
StatusWith<ChunkVersion> ChunkVersion::parseFromBSONWithFieldForCommands(const BSONObj& obj, StringData field) { BSONElement versionElem; Status status = bsonExtractField(obj, field, &versionElem); if (!status.isOK()) return status; if (versionElem.type() != Array) { return {ErrorCodes::TypeMismatch, str::stream() << "Invalid type " << versionElem.type() << " for shardVersion element. Expected an array"}; } BSONObjIterator it(versionElem.Obj()); if (!it.more()) return {ErrorCodes::BadValue, "Unexpected empty version"}; ChunkVersion version; // Expect the timestamp { BSONElement tsPart = it.next(); if (tsPart.type() != bsonTimestamp) return {ErrorCodes::TypeMismatch, str::stream() << "Invalid type " << tsPart.type() << " for version timestamp part."}; version._combined = tsPart.timestamp().asULL(); } // Expect the epoch OID { BSONElement epochPart = it.next(); if (epochPart.type() != jstOID) return {ErrorCodes::TypeMismatch, str::stream() << "Invalid type " << epochPart.type() << " for version epoch part."}; version._epoch = epochPart.OID(); } return version; }
FieldParser::FieldState FieldParser::extract(BSONElement elem, const BSONField<Timestamp>& field, Timestamp* out, string* errMsg) { if (elem.eoo()) { if (field.hasDefault()) { *out = field.getDefault(); return FIELD_DEFAULT; } else { return FIELD_NONE; } } if (elem.type() == bsonTimestamp) { *out = elem.timestamp(); return FIELD_SET; } _genFieldErrMsg(elem, field, "timestamp", errMsg); return FIELD_INVALID; }
std::string runQuery(OperationContext* txn, QueryMessage& q, const NamespaceString& nss, CurOp& curop, Message &result) { // Validate the namespace. uassert(16256, str::stream() << "Invalid ns [" << nss.ns() << "]", nss.isValid()); invariant(!nss.isCommand()); // Set curop information. beginQueryOp(nss, q.query, q.ntoreturn, q.ntoskip, &curop); // Parse the qm into a CanonicalQuery. std::auto_ptr<CanonicalQuery> cq; { CanonicalQuery* cqRaw; Status canonStatus = CanonicalQuery::canonicalize(q, &cqRaw, WhereCallbackReal(txn, nss.db())); if (!canonStatus.isOK()) { uasserted(17287, str::stream() << "Can't canonicalize query: " << canonStatus.toString()); } cq.reset(cqRaw); } invariant(cq.get()); LOG(5) << "Running query:\n" << cq->toString(); LOG(2) << "Running query: " << cq->toStringShort(); // Parse, canonicalize, plan, transcribe, and get a plan executor. AutoGetCollectionForRead ctx(txn, nss); Collection* collection = ctx.getCollection(); const int dbProfilingLevel = ctx.getDb() ? ctx.getDb()->getProfilingLevel() : serverGlobalParams.defaultProfile; // We have a parsed query. Time to get the execution plan for it. std::unique_ptr<PlanExecutor> exec; { PlanExecutor* rawExec; Status execStatus = getExecutorFind(txn, collection, nss, cq.release(), PlanExecutor::YIELD_AUTO, &rawExec); uassertStatusOK(execStatus); exec.reset(rawExec); } const LiteParsedQuery& pq = exec->getCanonicalQuery()->getParsed(); // If it's actually an explain, do the explain and return rather than falling through // to the normal query execution loop. if (pq.isExplain()) { BufBuilder bb; bb.skip(sizeof(QueryResult::Value)); BSONObjBuilder explainBob; Explain::explainStages(exec.get(), ExplainCommon::EXEC_ALL_PLANS, &explainBob); // Add the resulting object to the return buffer. BSONObj explainObj = explainBob.obj(); bb.appendBuf((void*)explainObj.objdata(), explainObj.objsize()); // TODO: Does this get overwritten/do we really need to set this twice? curop.debug().query = q.query; // Set query result fields. QueryResult::View qr = bb.buf(); bb.decouple(); qr.setResultFlagsToOk(); qr.msgdata().setLen(bb.len()); curop.debug().responseLength = bb.len(); qr.msgdata().setOperation(opReply); qr.setCursorId(0); qr.setStartingFrom(0); qr.setNReturned(1); result.setData(qr.view2ptr(), true); return ""; } // We freak out later if this changes before we're done with the query. const ChunkVersion shardingVersionAtStart = shardingState.getVersion(nss.ns()); // Handle query option $maxTimeMS (not used with commands). curop.setMaxTimeMicros(static_cast<unsigned long long>(pq.getMaxTimeMS()) * 1000); txn->checkForInterrupt(); // May trigger maxTimeAlwaysTimeOut fail point. // uassert if we are not on a primary, and not a secondary with SlaveOk query parameter set. bool slaveOK = pq.isSlaveOk() || pq.hasReadPref(); Status serveReadsStatus = repl::getGlobalReplicationCoordinator()->checkCanServeReadsFor( txn, nss, slaveOK); uassertStatusOK(serveReadsStatus); // Run the query. // bb is used to hold query results // this buffer should contain either requested documents per query or // explain information, but not both BufBuilder bb(32768); bb.skip(sizeof(QueryResult::Value)); // How many results have we obtained from the executor? int numResults = 0; // If we're replaying the oplog, we save the last time that we read. Timestamp slaveReadTill; BSONObj obj; PlanExecutor::ExecState state; // uint64_t numMisplacedDocs = 0; // Get summary info about which plan the executor is using. curop.debug().planSummary = Explain::getPlanSummary(exec.get()); while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) { // Add result to output buffer. bb.appendBuf((void*)obj.objdata(), obj.objsize()); // Count the result. ++numResults; // Possibly note slave's position in the oplog. if (pq.isOplogReplay()) { BSONElement e = obj["ts"]; if (Date == e.type() || bsonTimestamp == e.type()) { slaveReadTill = e.timestamp(); } } if (enoughForFirstBatch(pq, numResults, bb.len())) { LOG(5) << "Enough for first batch, wantMore=" << pq.wantMore() << " numToReturn=" << pq.getNumToReturn() << " numResults=" << numResults << endl; break; } } // If we cache the executor later, we want to deregister it as it receives notifications // anyway by virtue of being cached. // // If we don't cache the executor later, we are deleting it, so it must be deregistered. // // So, no matter what, deregister the executor. exec->deregisterExec(); // Caller expects exceptions thrown in certain cases. if (PlanExecutor::FAILURE == state) { scoped_ptr<PlanStageStats> stats(exec->getStats()); error() << "Plan executor error, stats: " << Explain::statsToBSON(*stats); uasserted(17144, "Executor error: " + WorkingSetCommon::toStatusString(obj)); } // TODO: Currently, chunk ranges are kept around until all ClientCursors created while the // chunk belonged on this node are gone. Separating chunk lifetime management from // ClientCursor should allow this check to go away. if (!shardingState.getVersion(nss.ns()).isWriteCompatibleWith(shardingVersionAtStart)) { // if the version changed during the query we might be missing some data and its safe to // send this as mongos can resend at this point throw SendStaleConfigException(nss.ns(), "version changed during initial query", shardingVersionAtStart, shardingState.getVersion(nss.ns())); } // Fill out curop based on query results. If we have a cursorid, we will fill out curop with // this cursorid later. long long ccId = 0; if (shouldSaveCursor(txn, collection, state, exec.get())) { // We won't use the executor until it's getMore'd. exec->saveState(); // Allocate a new ClientCursor. We don't have to worry about leaking it as it's // inserted into a global map by its ctor. ClientCursor* cc = new ClientCursor(collection->getCursorManager(), exec.release(), nss.ns(), pq.getOptions(), pq.getFilter()); ccId = cc->cursorid(); if (txn->getClient()->isInDirectClient()) { cc->setUnownedRecoveryUnit(txn->recoveryUnit()); } else if (state == PlanExecutor::IS_EOF && pq.isTailable()) { // Don't stash the RU for tailable cursors at EOF, let them get a new RU on their // next getMore. } else { // We stash away the RecoveryUnit in the ClientCursor. It's used for subsequent // getMore requests. The calling OpCtx gets a fresh RecoveryUnit. txn->recoveryUnit()->abandonSnapshot(); cc->setOwnedRecoveryUnit(txn->releaseRecoveryUnit()); StorageEngine* storageEngine = getGlobalServiceContext()->getGlobalStorageEngine(); invariant(txn->setRecoveryUnit(storageEngine->newRecoveryUnit(), OperationContext::kNotInUnitOfWork) == OperationContext::kNotInUnitOfWork); } LOG(5) << "caching executor with cursorid " << ccId << " after returning " << numResults << " results" << endl; // TODO document if (pq.isOplogReplay() && !slaveReadTill.isNull()) { cc->slaveReadTill(slaveReadTill); } // TODO document if (pq.isExhaust()) { curop.debug().exhaust = true; } cc->setPos(numResults); // If the query had a time limit, remaining time is "rolled over" to the cursor (for // use by future getmore ops). cc->setLeftoverMaxTimeMicros(curop.getRemainingMaxTimeMicros()); endQueryOp(cc->getExecutor(), dbProfilingLevel, numResults, ccId, &curop); } else { LOG(5) << "Not caching executor but returning " << numResults << " results.\n"; endQueryOp(exec.get(), dbProfilingLevel, numResults, ccId, &curop); } // Add the results from the query into the output buffer. result.appendData(bb.buf(), bb.len()); bb.decouple(); // Fill out the output buffer's header. QueryResult::View qr = result.header().view2ptr(); qr.setCursorId(ccId); qr.setResultFlagsToOk(); qr.msgdata().setOperation(opReply); qr.setStartingFrom(0); qr.setNReturned(numResults); // curop.debug().exhaust is set above. return curop.debug().exhaust ? nss.ns() : ""; }
/** * Called by db/instance.cpp. This is the getMore entry point. * * pass - when QueryOption_AwaitData is in use, the caller will make repeated calls * when this method returns an empty result, incrementing pass on each call. * Thus, pass == 0 indicates this is the first "attempt" before any 'awaiting'. */ QueryResult::View getMore(OperationContext* txn, const char* ns, int ntoreturn, long long cursorid, CurOp& curop, int pass, bool& exhaust, bool* isCursorAuthorized) { // For testing, we may want to fail if we receive a getmore. if (MONGO_FAIL_POINT(failReceivedGetmore)) { invariant(0); } exhaust = false; const NamespaceString nss(ns); // Depending on the type of cursor being operated on, we hold locks for the whole getMore, // or none of the getMore, or part of the getMore. The three cases in detail: // // 1) Normal cursor: we lock with "ctx" and hold it for the whole getMore. // 2) Cursor owned by global cursor manager: we don't lock anything. These cursors don't // own any collection state. // 3) Agg cursor: we lock with "ctx", then release, then relock with "unpinDBLock" and // "unpinCollLock". This is because agg cursors handle locking internally (hence the // release), but the pin and unpin of the cursor must occur under the collection lock. // We don't use our AutoGetCollectionForRead "ctx" to relock, because // AutoGetCollectionForRead checks the sharding version (and we want the relock for the // unpin to succeed even if the sharding version has changed). // // Note that we declare our locks before our ClientCursorPin, in order to ensure that the // pin's destructor is called before the lock destructors (so that the unpin occurs under // the lock). boost::scoped_ptr<AutoGetCollectionForRead> ctx; boost::scoped_ptr<Lock::DBLock> unpinDBLock; boost::scoped_ptr<Lock::CollectionLock> unpinCollLock; CursorManager* cursorManager; CursorManager* globalCursorManager = CursorManager::getGlobalCursorManager(); if (globalCursorManager->ownsCursorId(cursorid)) { cursorManager = globalCursorManager; } else { ctx.reset(new AutoGetCollectionForRead(txn, nss)); Collection* collection = ctx->getCollection(); uassert( 17356, "collection dropped between getMore calls", collection ); cursorManager = collection->getCursorManager(); } LOG(5) << "Running getMore, cursorid: " << cursorid << endl; // This checks to make sure the operation is allowed on a replicated node. Since we are not // passing in a query object (necessary to check SlaveOK query option), the only state where // reads are allowed is PRIMARY (or master in master/slave). This function uasserts if // reads are not okay. Status status = repl::getGlobalReplicationCoordinator()->checkCanServeReadsFor( txn, nss, true); uassertStatusOK(status); // A pin performs a CC lookup and if there is a CC, increments the CC's pin value so it // doesn't time out. Also informs ClientCursor that there is somebody actively holding the // CC, so don't delete it. ClientCursorPin ccPin(cursorManager, cursorid); ClientCursor* cc = ccPin.c(); // If we're not being called from DBDirectClient we want to associate the RecoveryUnit // used to create the execution machinery inside the cursor with our OperationContext. // If we throw or otherwise exit this method in a disorderly fashion, we must ensure // that further calls to getMore won't fail, and that the provided OperationContext // has a valid RecoveryUnit. As such, we use RAII to accomplish this. // // This must be destroyed before the ClientCursor is destroyed. std::auto_ptr<ScopedRecoveryUnitSwapper> ruSwapper; // These are set in the QueryResult msg we return. int resultFlags = ResultFlag_AwaitCapable; int numResults = 0; int startingResult = 0; const int InitialBufSize = 512 + sizeof(QueryResult::Value) + MaxBytesToReturnToClientAtOnce; BufBuilder bb(InitialBufSize); bb.skip(sizeof(QueryResult::Value)); if (NULL == cc) { cursorid = 0; resultFlags = ResultFlag_CursorNotFound; } else { // Check for spoofing of the ns such that it does not match the one originally // there for the cursor. uassert(ErrorCodes::Unauthorized, str::stream() << "Requested getMore on namespace " << ns << ", but cursor " << cursorid << " belongs to namespace " << cc->ns(), ns == cc->ns()); *isCursorAuthorized = true; // Restore the RecoveryUnit if we need to. if (txn->getClient()->isInDirectClient()) { if (cc->hasRecoveryUnit()) invariant(txn->recoveryUnit() == cc->getUnownedRecoveryUnit()); } else { if (!cc->hasRecoveryUnit()) { // Start using a new RecoveryUnit cc->setOwnedRecoveryUnit( getGlobalServiceContext()->getGlobalStorageEngine()->newRecoveryUnit()); } // Swap RecoveryUnit(s) between the ClientCursor and OperationContext. ruSwapper.reset(new ScopedRecoveryUnitSwapper(cc, txn)); } // Reset timeout timer on the cursor since the cursor is still in use. cc->setIdleTime(0); // If the operation that spawned this cursor had a time limit set, apply leftover // time to this getmore. curop.setMaxTimeMicros(cc->getLeftoverMaxTimeMicros()); txn->checkForInterrupt(); // May trigger maxTimeAlwaysTimeOut fail point. if (0 == pass) { cc->updateSlaveLocation(txn); } if (cc->isAggCursor()) { // Agg cursors handle their own locking internally. ctx.reset(); // unlocks } // If we're replaying the oplog, we save the last time that we read. Timestamp slaveReadTill; // What number result are we starting at? Used to fill out the reply. startingResult = cc->pos(); // What gives us results. PlanExecutor* exec = cc->getExecutor(); const int queryOptions = cc->queryOptions(); // Get results out of the executor. exec->restoreState(txn); BSONObj obj; PlanExecutor::ExecState state; while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) { // Add result to output buffer. bb.appendBuf((void*)obj.objdata(), obj.objsize()); // Count the result. ++numResults; // Possibly note slave's position in the oplog. if (queryOptions & QueryOption_OplogReplay) { BSONElement e = obj["ts"]; if (Date == e.type() || bsonTimestamp == e.type()) { slaveReadTill = e.timestamp(); } } if (enoughForGetMore(ntoreturn, numResults, bb.len())) { break; } } if (PlanExecutor::DEAD == state || PlanExecutor::FAILURE == state) { // Propagate this error to caller. if (PlanExecutor::FAILURE == state) { scoped_ptr<PlanStageStats> stats(exec->getStats()); error() << "Plan executor error, stats: " << Explain::statsToBSON(*stats); uasserted(17406, "getMore executor error: " + WorkingSetCommon::toStatusString(obj)); } // In the old system tailable capped cursors would be killed off at the // cursorid level. If a tailable capped cursor is nuked the cursorid // would vanish. // // In the new system they die and are cleaned up later (or time out). // So this is where we get to remove the cursorid. if (0 == numResults) { resultFlags = ResultFlag_CursorNotFound; } } const bool shouldSaveCursor = shouldSaveCursorGetMore(state, exec, isCursorTailable(cc)); // In order to deregister a cursor, we need to be holding the DB + collection lock and // if the cursor is aggregation, we release these locks. if (cc->isAggCursor()) { invariant(NULL == ctx.get()); unpinDBLock.reset(new Lock::DBLock(txn->lockState(), nss.db(), MODE_IS)); unpinCollLock.reset(new Lock::CollectionLock(txn->lockState(), nss.ns(), MODE_IS)); } // Our two possible ClientCursorPin cleanup paths are: // 1) If the cursor is not going to be saved, we call deleteUnderlying() on the pin. // 2) If the cursor is going to be saved, we simply let the pin go out of scope. In // this case, the pin's destructor will be invoked, which will call release() on the // pin. Because our ClientCursorPin is declared after our lock is declared, this // will happen under the lock. if (!shouldSaveCursor) { ruSwapper.reset(); ccPin.deleteUnderlying(); // cc is now invalid, as is the executor cursorid = 0; cc = NULL; curop.debug().cursorExhausted = true; LOG(5) << "getMore NOT saving client cursor, ended with state " << PlanExecutor::statestr(state) << endl; } else { // Continue caching the ClientCursor. cc->incPos(numResults); exec->saveState(); LOG(5) << "getMore saving client cursor ended with state " << PlanExecutor::statestr(state) << endl; if (PlanExecutor::IS_EOF == state && (queryOptions & QueryOption_CursorTailable)) { if (!txn->getClient()->isInDirectClient()) { // Don't stash the RU. Get a new one on the next getMore. ruSwapper->dismiss(); } if ((queryOptions & QueryOption_AwaitData) && (numResults == 0) && (pass < 1000)) { // Bubble up to the AwaitData handling code in receivedGetMore which will // try again. return NULL; } } // Possibly note slave's position in the oplog. if ((queryOptions & QueryOption_OplogReplay) && !slaveReadTill.isNull()) { cc->slaveReadTill(slaveReadTill); } exhaust = (queryOptions & QueryOption_Exhaust); // If the getmore had a time limit, remaining time is "rolled over" back to the // cursor (for use by future getmore ops). cc->setLeftoverMaxTimeMicros( curop.getRemainingMaxTimeMicros() ); } } QueryResult::View qr = bb.buf(); qr.msgdata().setLen(bb.len()); qr.msgdata().setOperation(opReply); qr.setResultFlags(resultFlags); qr.setCursorId(cursorid); qr.setStartingFrom(startingResult); qr.setNReturned(numResults); bb.decouple(); LOG(5) << "getMore returned " << numResults << " results\n"; return qr; }
void BSONComparatorInterfaceBase<T>::hashCombineBSONElement( size_t& hash, BSONElement elemToHash, bool considerFieldName, const StringData::ComparatorInterface* stringComparator) { boost::hash_combine(hash, elemToHash.canonicalType()); const StringData fieldName = elemToHash.fieldNameStringData(); if (considerFieldName && !fieldName.empty()) { SimpleStringDataComparator::kInstance.hash_combine(hash, fieldName); } switch (elemToHash.type()) { // Order of types is the same as in compareElementValues(). case mongo::EOO: case mongo::Undefined: case mongo::jstNULL: case mongo::MaxKey: case mongo::MinKey: // These are valueless types break; case mongo::Bool: boost::hash_combine(hash, elemToHash.boolean()); break; case mongo::bsonTimestamp: boost::hash_combine(hash, elemToHash.timestamp().asULL()); break; case mongo::Date: boost::hash_combine(hash, elemToHash.date().asInt64()); break; case mongo::NumberDecimal: { const Decimal128 dcml = elemToHash.numberDecimal(); if (dcml.toAbs().isGreater(Decimal128(std::numeric_limits<double>::max(), Decimal128::kRoundTo34Digits, Decimal128::kRoundTowardZero)) && !dcml.isInfinite() && !dcml.isNaN()) { // Normalize our decimal to force equivalent decimals // in the same cohort to hash to the same value Decimal128 dcmlNorm(dcml.normalize()); boost::hash_combine(hash, dcmlNorm.getValue().low64); boost::hash_combine(hash, dcmlNorm.getValue().high64); break; } // Else, fall through and convert the decimal to a double and hash. // At this point the decimal fits into the range of doubles, is infinity, or is NaN, // which doubles have a cheaper representation for. } case mongo::NumberDouble: case mongo::NumberLong: case mongo::NumberInt: { // This converts all numbers to doubles, which ignores the low-order bits of // NumberLongs > 2**53 and precise decimal numbers without double representations, // but that is ok since the hash will still be the same for equal numbers and is // still likely to be different for different numbers. (Note: this issue only // applies for decimals when they are outside of the valid double range. See // the above case.) // SERVER-16851 const double dbl = elemToHash.numberDouble(); if (std::isnan(dbl)) { boost::hash_combine(hash, std::numeric_limits<double>::quiet_NaN()); } else { boost::hash_combine(hash, dbl); } break; } case mongo::jstOID: elemToHash.__oid().hash_combine(hash); break; case mongo::String: { if (stringComparator) { stringComparator->hash_combine(hash, elemToHash.valueStringData()); } else { SimpleStringDataComparator::kInstance.hash_combine(hash, elemToHash.valueStringData()); } break; } case mongo::Code: case mongo::Symbol: SimpleStringDataComparator::kInstance.hash_combine(hash, elemToHash.valueStringData()); break; case mongo::Object: case mongo::Array: hashCombineBSONObj(hash, elemToHash.embeddedObject(), true, // considerFieldName stringComparator); break; case mongo::DBRef: case mongo::BinData: // All bytes of the value are required to be identical. SimpleStringDataComparator::kInstance.hash_combine( hash, StringData(elemToHash.value(), elemToHash.valuesize())); break; case mongo::RegEx: SimpleStringDataComparator::kInstance.hash_combine(hash, elemToHash.regex()); SimpleStringDataComparator::kInstance.hash_combine(hash, elemToHash.regexFlags()); break; case mongo::CodeWScope: { SimpleStringDataComparator::kInstance.hash_combine( hash, StringData(elemToHash.codeWScopeCode(), elemToHash.codeWScopeCodeLen())); hashCombineBSONObj(hash, elemToHash.codeWScopeObject(), true, // considerFieldName &SimpleStringDataComparator::kInstance); break; } } }
Status ReplSetHeartbeatResponse::initialize(const BSONObj& doc, long long term) { // Old versions set this even though they returned not "ok" _mismatch = doc[kMismatchFieldName].trueValue(); if (_mismatch) return Status(ErrorCodes::InconsistentReplicaSetNames, "replica set name doesn't match."); // Old versions sometimes set the replica set name ("set") but ok:0 const BSONElement replSetNameElement = doc[kReplSetFieldName]; if (replSetNameElement.eoo()) { _setName.clear(); } else if (replSetNameElement.type() != String) { return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected \"" << kReplSetFieldName << "\" field in response to replSetHeartbeat to have " "type String, but found " << typeName(replSetNameElement.type())); } else { _setName = replSetNameElement.String(); } if (_setName.empty() && !doc[kOkFieldName].trueValue()) { std::string errMsg = doc[kErrMsgFieldName].str(); BSONElement errCodeElem = doc[kErrorCodeFieldName]; if (errCodeElem.ok()) { if (!errCodeElem.isNumber()) return Status(ErrorCodes::BadValue, "Error code is not a number!"); int errorCode = errCodeElem.numberInt(); return Status(ErrorCodes::Error(errorCode), errMsg); } return Status(ErrorCodes::UnknownError, errMsg); } const BSONElement hasDataElement = doc[kHasDataFieldName]; _hasDataSet = !hasDataElement.eoo(); _hasData = hasDataElement.trueValue(); const BSONElement electionTimeElement = doc[kElectionTimeFieldName]; if (electionTimeElement.eoo()) { _electionTimeSet = false; } else if (electionTimeElement.type() == bsonTimestamp) { _electionTimeSet = true; _electionTime = electionTimeElement.timestamp(); } else if (electionTimeElement.type() == Date) { _electionTimeSet = true; _electionTime = Timestamp(electionTimeElement.date()); } else { return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected \"" << kElectionTimeFieldName << "\" field in response to replSetHeartbeat " "command to have type Date or Timestamp, but found type " << typeName(electionTimeElement.type())); } const BSONElement timeElement = doc[kTimeFieldName]; if (timeElement.eoo()) { _timeSet = false; } else if (timeElement.isNumber()) { _timeSet = true; _time = Seconds(timeElement.numberLong()); } else { return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected \"" << kTimeFieldName << "\" field in response to replSetHeartbeat " "command to have a numeric type, but found type " << typeName(timeElement.type())); } _isReplSet = doc[kIsReplSetFieldName].trueValue(); Status termStatus = bsonExtractIntegerField(doc, kTermFieldName, &_term); if (!termStatus.isOK() && termStatus != ErrorCodes::NoSuchKey) { return termStatus; } // In order to support both the 3.0(V0) and 3.2(V1) heartbeats we must parse the OpTime // field based on its type. If it is a Date, we parse it as the timestamp and use // initialize's term argument to complete the OpTime type. If it is an Object, then it's // V1 and we construct an OpTime out of its nested fields. const BSONElement opTimeElement = doc[kOpTimeFieldName]; if (opTimeElement.eoo()) { _opTimeSet = false; } else if (opTimeElement.type() == bsonTimestamp) { _opTimeSet = true; _opTime = OpTime(opTimeElement.timestamp(), term); } else if (opTimeElement.type() == Date) { _opTimeSet = true; _opTime = OpTime(Timestamp(opTimeElement.date()), term); } else if (opTimeElement.type() == Object) { Status status = bsonExtractOpTimeField(doc, kOpTimeFieldName, &_opTime); _opTimeSet = true; // since a v1 OpTime was in the response, the member must be part of a replset _isReplSet = true; } else { return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected \"" << kOpTimeFieldName << "\" field in response to replSetHeartbeat " "command to have type Date or Timestamp, but found type " << typeName(opTimeElement.type())); } const BSONElement electableElement = doc[kIsElectableFieldName]; if (electableElement.eoo()) { _electableSet = false; } else { _electableSet = true; _electable = electableElement.trueValue(); } const BSONElement memberStateElement = doc[kMemberStateFieldName]; if (memberStateElement.eoo()) { _stateSet = false; } else if (memberStateElement.type() != NumberInt && memberStateElement.type() != NumberLong) { return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected \"" << kMemberStateFieldName << "\" field in response to replSetHeartbeat " "command to have type NumberInt or NumberLong, but found type " << typeName(memberStateElement.type())); } else { long long stateInt = memberStateElement.numberLong(); if (stateInt < 0 || stateInt > MemberState::RS_MAX) { return Status(ErrorCodes::BadValue, str::stream() << "Value for \"" << kMemberStateFieldName << "\" in response to replSetHeartbeat is " "out of range; legal values are non-negative and no more than " << MemberState::RS_MAX); } _stateSet = true; _state = MemberState(static_cast<int>(stateInt)); } _stateDisagreement = doc[kHasStateDisagreementFieldName].trueValue(); // Not required for the case of uninitialized members -- they have no config const BSONElement configVersionElement = doc[kConfigVersionFieldName]; // If we have an optime then we must have a configVersion if (_opTimeSet && configVersionElement.eoo()) { return Status(ErrorCodes::NoSuchKey, str::stream() << "Response to replSetHeartbeat missing required \"" << kConfigVersionFieldName << "\" field even though initialized"); } // If there is a "v" (config version) then it must be an int. if (!configVersionElement.eoo() && configVersionElement.type() != NumberInt) { return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected \"" << kConfigVersionFieldName << "\" field in response to replSetHeartbeat to have " "type NumberInt, but found " << typeName(configVersionElement.type())); } _configVersion = configVersionElement.numberInt(); const BSONElement hbMsgElement = doc[kHbMessageFieldName]; if (hbMsgElement.eoo()) { _hbmsg.clear(); } else if (hbMsgElement.type() != String) { return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected \"" << kHbMessageFieldName << "\" field in response to replSetHeartbeat to have " "type String, but found " << typeName(hbMsgElement.type())); } else { _hbmsg = hbMsgElement.String(); } const BSONElement syncingToElement = doc[kSyncSourceFieldName]; if (syncingToElement.eoo()) { _syncingTo = HostAndPort(); } else if (syncingToElement.type() != String) { return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected \"" << kSyncSourceFieldName << "\" field in response to replSetHeartbeat to " "have type String, but found " << typeName(syncingToElement.type())); } else { _syncingTo = HostAndPort(syncingToElement.String()); } const BSONElement rsConfigElement = doc[kConfigFieldName]; if (rsConfigElement.eoo()) { _configSet = false; _config = ReplicaSetConfig(); return Status::OK(); } else if (rsConfigElement.type() != Object) { return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected \"" << kConfigFieldName << "\" in response to replSetHeartbeat to have type " "Object, but found " << typeName(rsConfigElement.type())); } _configSet = true; return _config.initialize(rsConfigElement.Obj()); }
bool BatchedCommandResponse::parseBSON(const BSONObj& source, string* errMsg) { clear(); std::string dummy; if (!errMsg) errMsg = &dummy; FieldParser::FieldState fieldState; fieldState = FieldParser::extractNumber(source, ok, &_ok, errMsg); if (fieldState == FieldParser::FIELD_INVALID) return false; _isOkSet = fieldState == FieldParser::FIELD_SET; fieldState = FieldParser::extract(source, errCode, &_errCode, errMsg); if (fieldState == FieldParser::FIELD_INVALID) return false; _isErrCodeSet = fieldState == FieldParser::FIELD_SET; fieldState = FieldParser::extract(source, errMessage, &_errMessage, errMsg); if (fieldState == FieldParser::FIELD_INVALID) return false; _isErrMessageSet = fieldState == FieldParser::FIELD_SET; // We're using appendNumber on generation so we'll try a smaller type // (int) first and then fall back to the original type (long long). BSONField<int> fieldN(n()); int tempN; fieldState = FieldParser::extract(source, fieldN, &tempN, errMsg); if (fieldState == FieldParser::FIELD_INVALID) { // try falling back to a larger type fieldState = FieldParser::extract(source, n, &_n, errMsg); if (fieldState == FieldParser::FIELD_INVALID) return false; _isNSet = fieldState == FieldParser::FIELD_SET; } else if (fieldState == FieldParser::FIELD_SET) { _isNSet = true; _n = tempN; } // We're using appendNumber on generation so we'll try a smaller type // (int) first and then fall back to the original type (long long). BSONField<int> fieldNModified(nModified()); int intNModified; fieldState = FieldParser::extract(source, fieldNModified, &intNModified, errMsg); if (fieldState == FieldParser::FIELD_INVALID) { // try falling back to a larger type fieldState = FieldParser::extract(source, nModified, &_nModified, errMsg); if (fieldState == FieldParser::FIELD_INVALID) return false; _isNModifiedSet = fieldState == FieldParser::FIELD_SET; } else if (fieldState == FieldParser::FIELD_SET) { _isNModifiedSet = true; _nModified = intNModified; } std::vector<BatchedUpsertDetail*>* tempUpsertDetails = NULL; fieldState = FieldParser::extract(source, upsertDetails, &tempUpsertDetails, errMsg); if (fieldState == FieldParser::FIELD_INVALID) return false; _upsertDetails.reset(tempUpsertDetails); const BSONElement opTimeElement = source["opTime"]; _isLastOpSet = true; if (opTimeElement.eoo()) { _isLastOpSet = false; } else if (opTimeElement.type() == bsonTimestamp) { _lastOp = repl::OpTime(opTimeElement.timestamp(), repl::OpTime::kUninitializedTerm); } else if (opTimeElement.type() == Date) { _lastOp = repl::OpTime(Timestamp(opTimeElement.date()), repl::OpTime::kUninitializedTerm); } else if (opTimeElement.type() == Object) { Status status = bsonExtractOpTimeField(source, "opTime", &_lastOp); if (!status.isOK()) { return false; } } else { return false; } fieldState = FieldParser::extract(source, electionId, &_electionId, errMsg); if (fieldState == FieldParser::FIELD_INVALID) return false; _isElectionIdSet = fieldState == FieldParser::FIELD_SET; std::vector<WriteErrorDetail*>* tempErrDetails = NULL; fieldState = FieldParser::extract(source, writeErrors, &tempErrDetails, errMsg); if (fieldState == FieldParser::FIELD_INVALID) return false; _writeErrorDetails.reset(tempErrDetails); WCErrorDetail* wcError = NULL; fieldState = FieldParser::extract(source, writeConcernError, &wcError, errMsg); if (fieldState == FieldParser::FIELD_INVALID) return false; _wcErrDetails.reset(wcError); return true; }
std::string runQuery(OperationContext* txn, QueryMessage& q, const NamespaceString& nss, Message& result) { CurOp& curop = *CurOp::get(txn); uassert(ErrorCodes::InvalidNamespace, str::stream() << "Invalid ns [" << nss.ns() << "]", nss.isValid()); invariant(!nss.isCommand()); // Set curop information. beginQueryOp(txn, nss, q.query, q.ntoreturn, q.ntoskip); // Parse the qm into a CanonicalQuery. auto statusWithCQ = CanonicalQuery::canonicalize(q, ExtensionsCallbackReal(txn, &nss)); if (!statusWithCQ.isOK()) { uasserted( 17287, str::stream() << "Can't canonicalize query: " << statusWithCQ.getStatus().toString()); } unique_ptr<CanonicalQuery> cq = std::move(statusWithCQ.getValue()); invariant(cq.get()); LOG(5) << "Running query:\n" << cq->toString(); LOG(2) << "Running query: " << cq->toStringShort(); // Parse, canonicalize, plan, transcribe, and get a plan executor. AutoGetCollectionForRead ctx(txn, nss); Collection* collection = ctx.getCollection(); const int dbProfilingLevel = ctx.getDb() ? ctx.getDb()->getProfilingLevel() : serverGlobalParams.defaultProfile; // We have a parsed query. Time to get the execution plan for it. std::unique_ptr<PlanExecutor> exec = uassertStatusOK( getExecutorFind(txn, collection, nss, std::move(cq), PlanExecutor::YIELD_AUTO)); const LiteParsedQuery& pq = exec->getCanonicalQuery()->getParsed(); // If it's actually an explain, do the explain and return rather than falling through // to the normal query execution loop. if (pq.isExplain()) { BufBuilder bb; bb.skip(sizeof(QueryResult::Value)); BSONObjBuilder explainBob; Explain::explainStages(exec.get(), ExplainCommon::EXEC_ALL_PLANS, &explainBob); // Add the resulting object to the return buffer. BSONObj explainObj = explainBob.obj(); bb.appendBuf((void*)explainObj.objdata(), explainObj.objsize()); // TODO: Does this get overwritten/do we really need to set this twice? curop.debug().query = q.query; // Set query result fields. QueryResult::View qr = bb.buf(); bb.decouple(); qr.setResultFlagsToOk(); qr.msgdata().setLen(bb.len()); curop.debug().responseLength = bb.len(); qr.msgdata().setOperation(opReply); qr.setCursorId(0); qr.setStartingFrom(0); qr.setNReturned(1); result.setData(qr.view2ptr(), true); return ""; } // Handle query option $maxTimeMS (not used with commands). curop.setMaxTimeMicros(static_cast<unsigned long long>(pq.getMaxTimeMS()) * 1000); txn->checkForInterrupt(); // May trigger maxTimeAlwaysTimeOut fail point. // uassert if we are not on a primary, and not a secondary with SlaveOk query parameter set. bool slaveOK = pq.isSlaveOk() || pq.hasReadPref(); Status serveReadsStatus = repl::getGlobalReplicationCoordinator()->checkCanServeReadsFor(txn, nss, slaveOK); uassertStatusOK(serveReadsStatus); // Run the query. // bb is used to hold query results // this buffer should contain either requested documents per query or // explain information, but not both BufBuilder bb(FindCommon::kInitReplyBufferSize); bb.skip(sizeof(QueryResult::Value)); // How many results have we obtained from the executor? int numResults = 0; // If we're replaying the oplog, we save the last time that we read. Timestamp slaveReadTill; BSONObj obj; PlanExecutor::ExecState state; // Get summary info about which plan the executor is using. { stdx::lock_guard<Client> lk(*txn->getClient()); curop.setPlanSummary_inlock(Explain::getPlanSummary(exec.get())); } while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) { // If we can't fit this result inside the current batch, then we stash it for later. if (!FindCommon::haveSpaceForNext(obj, numResults, bb.len())) { exec->enqueue(obj); break; } // Add result to output buffer. bb.appendBuf((void*)obj.objdata(), obj.objsize()); // Count the result. ++numResults; // Possibly note slave's position in the oplog. if (pq.isOplogReplay()) { BSONElement e = obj["ts"]; if (Date == e.type() || bsonTimestamp == e.type()) { slaveReadTill = e.timestamp(); } } if (FindCommon::enoughForFirstBatch(pq, numResults)) { LOG(5) << "Enough for first batch, wantMore=" << pq.wantMore() << " ntoreturn=" << pq.getNToReturn().value_or(0) << " numResults=" << numResults << endl; break; } } // If we cache the executor later, we want to deregister it as it receives notifications // anyway by virtue of being cached. // // If we don't cache the executor later, we are deleting it, so it must be deregistered. // // So, no matter what, deregister the executor. exec->deregisterExec(); // Caller expects exceptions thrown in certain cases. if (PlanExecutor::FAILURE == state || PlanExecutor::DEAD == state) { error() << "Plan executor error during find: " << PlanExecutor::statestr(state) << ", stats: " << Explain::getWinningPlanStats(exec.get()); uasserted(17144, "Executor error: " + WorkingSetCommon::toStatusString(obj)); } // Before saving the cursor, ensure that whatever plan we established happened with the expected // collection version auto css = CollectionShardingState::get(txn, nss); css->checkShardVersionOrThrow(txn); // Fill out curop based on query results. If we have a cursorid, we will fill out curop with // this cursorid later. long long ccId = 0; if (shouldSaveCursor(txn, collection, state, exec.get())) { // We won't use the executor until it's getMore'd. exec->saveState(); exec->detachFromOperationContext(); // Allocate a new ClientCursor. We don't have to worry about leaking it as it's // inserted into a global map by its ctor. ClientCursor* cc = new ClientCursor(collection->getCursorManager(), exec.release(), nss.ns(), txn->recoveryUnit()->isReadingFromMajorityCommittedSnapshot(), pq.getOptions(), pq.getFilter()); ccId = cc->cursorid(); LOG(5) << "caching executor with cursorid " << ccId << " after returning " << numResults << " results" << endl; // TODO document if (pq.isOplogReplay() && !slaveReadTill.isNull()) { cc->slaveReadTill(slaveReadTill); } // TODO document if (pq.isExhaust()) { curop.debug().exhaust = true; } cc->setPos(numResults); // If the query had a time limit, remaining time is "rolled over" to the cursor (for // use by future getmore ops). cc->setLeftoverMaxTimeMicros(curop.getRemainingMaxTimeMicros()); endQueryOp(txn, collection, *cc->getExecutor(), dbProfilingLevel, numResults, ccId); } else { LOG(5) << "Not caching executor but returning " << numResults << " results.\n"; endQueryOp(txn, collection, *exec, dbProfilingLevel, numResults, ccId); } // Add the results from the query into the output buffer. result.appendData(bb.buf(), bb.len()); bb.decouple(); // Fill out the output buffer's header. QueryResult::View qr = result.header().view2ptr(); qr.setCursorId(ccId); qr.setResultFlagsToOk(); qr.msgdata().setOperation(opReply); qr.setStartingFrom(0); qr.setNReturned(numResults); // curop.debug().exhaust is set above. return curop.debug().exhaust ? nss.ns() : ""; }
int BSONElement::compareElements(const BSONElement& l, const BSONElement& r, ComparisonRulesSet rules, const StringData::ComparatorInterface* comparator) { switch (l.type()) { case BSONType::EOO: case BSONType::Undefined: // EOO and Undefined are same canonicalType case BSONType::jstNULL: case BSONType::MaxKey: case BSONType::MinKey: { auto f = l.canonicalType() - r.canonicalType(); if (f < 0) return -1; return f == 0 ? 0 : 1; } case BSONType::Bool: return *l.value() - *r.value(); case BSONType::bsonTimestamp: // unsigned compare for timestamps - note they are not really dates but (ordinal + // time_t) if (l.timestamp() < r.timestamp()) return -1; return l.timestamp() == r.timestamp() ? 0 : 1; case BSONType::Date: // Signed comparisons for Dates. { const Date_t a = l.Date(); const Date_t b = r.Date(); if (a < b) return -1; return a == b ? 0 : 1; } case BSONType::NumberInt: { // All types can precisely represent all NumberInts, so it is safe to simply convert to // whatever rhs's type is. switch (r.type()) { case NumberInt: return compareInts(l._numberInt(), r._numberInt()); case NumberLong: return compareLongs(l._numberInt(), r._numberLong()); case NumberDouble: return compareDoubles(l._numberInt(), r._numberDouble()); case NumberDecimal: return compareIntToDecimal(l._numberInt(), r._numberDecimal()); default: MONGO_UNREACHABLE; } } case BSONType::NumberLong: { switch (r.type()) { case NumberLong: return compareLongs(l._numberLong(), r._numberLong()); case NumberInt: return compareLongs(l._numberLong(), r._numberInt()); case NumberDouble: return compareLongToDouble(l._numberLong(), r._numberDouble()); case NumberDecimal: return compareLongToDecimal(l._numberLong(), r._numberDecimal()); default: MONGO_UNREACHABLE; } } case BSONType::NumberDouble: { switch (r.type()) { case NumberDouble: return compareDoubles(l._numberDouble(), r._numberDouble()); case NumberInt: return compareDoubles(l._numberDouble(), r._numberInt()); case NumberLong: return compareDoubleToLong(l._numberDouble(), r._numberLong()); case NumberDecimal: return compareDoubleToDecimal(l._numberDouble(), r._numberDecimal()); default: MONGO_UNREACHABLE; } } case BSONType::NumberDecimal: { switch (r.type()) { case NumberDecimal: return compareDecimals(l._numberDecimal(), r._numberDecimal()); case NumberInt: return compareDecimalToInt(l._numberDecimal(), r._numberInt()); case NumberLong: return compareDecimalToLong(l._numberDecimal(), r._numberLong()); case NumberDouble: return compareDecimalToDouble(l._numberDecimal(), r._numberDouble()); default: MONGO_UNREACHABLE; } } case BSONType::jstOID: return memcmp(l.value(), r.value(), OID::kOIDSize); case BSONType::Code: return compareElementStringValues(l, r); case BSONType::Symbol: case BSONType::String: { if (comparator) { return comparator->compare(l.valueStringData(), r.valueStringData()); } else { return compareElementStringValues(l, r); } } case BSONType::Object: case BSONType::Array: { return l.embeddedObject().woCompare( r.embeddedObject(), BSONObj(), rules | BSONElement::ComparisonRules::kConsiderFieldName, comparator); } case BSONType::DBRef: { int lsz = l.valuesize(); int rsz = r.valuesize(); if (lsz - rsz != 0) return lsz - rsz; return memcmp(l.value(), r.value(), lsz); } case BSONType::BinData: { int lsz = l.objsize(); // our bin data size in bytes, not including the subtype byte int rsz = r.objsize(); if (lsz - rsz != 0) return lsz - rsz; return memcmp(l.value() + 4, r.value() + 4, lsz + 1 /*+1 for subtype byte*/); } case BSONType::RegEx: { int c = strcmp(l.regex(), r.regex()); if (c) return c; return strcmp(l.regexFlags(), r.regexFlags()); } case BSONType::CodeWScope: { int cmp = StringData(l.codeWScopeCode(), l.codeWScopeCodeLen() - 1) .compare(StringData(r.codeWScopeCode(), r.codeWScopeCodeLen() - 1)); if (cmp) return cmp; // When comparing the scope object, we should consider field names. Special string // comparison semantics do not apply to strings nested inside the CodeWScope scope // object, so we do not pass through the string comparator. return l.codeWScopeObject().woCompare( r.codeWScopeObject(), BSONObj(), rules | BSONElement::ComparisonRules::kConsiderFieldName); } } MONGO_UNREACHABLE; }
Status ReplSetHeartbeatResponse::initialize(const BSONObj& doc) { // Old versions set this even though they returned not "ok" _mismatch = doc[kMismatchFieldName].trueValue(); if (_mismatch) return Status(ErrorCodes::InconsistentReplicaSetNames, "replica set name doesn't match."); // Old versions sometimes set the replica set name ("set") but ok:0 const BSONElement replSetNameElement = doc[kReplSetFieldName]; if (replSetNameElement.eoo()) { _setName.clear(); } else if (replSetNameElement.type() != String) { return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected \"" << kReplSetFieldName << "\" field in response to replSetHeartbeat to have " "type String, but found " << typeName(replSetNameElement.type())); } else { _setName = replSetNameElement.String(); } if (_setName.empty() && !doc[kOkFieldName].trueValue()) { std::string errMsg = doc[kErrMsgFieldName].str(); BSONElement errCodeElem = doc[kErrorCodeFieldName]; if (errCodeElem.ok()) { if (!errCodeElem.isNumber()) return Status(ErrorCodes::BadValue, "Error code is not a number!"); int errorCode = errCodeElem.numberInt(); return Status(ErrorCodes::Error(errorCode), errMsg); } return Status(ErrorCodes::UnknownError, errMsg); } const BSONElement hasDataElement = doc[kHasDataFieldName]; _hasDataSet = !hasDataElement.eoo(); _hasData = hasDataElement.trueValue(); const BSONElement electionTimeElement = doc[kElectionTimeFieldName]; if (electionTimeElement.eoo()) { _electionTimeSet = false; } else if (electionTimeElement.type() == bsonTimestamp) { _electionTimeSet = true; _electionTime = electionTimeElement.timestamp(); } else if (electionTimeElement.type() == Date) { _electionTimeSet = true; _electionTime = Timestamp(electionTimeElement.date()); } else { return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected \"" << kElectionTimeFieldName << "\" field in response to replSetHeartbeat " "command to have type Date or Timestamp, but found type " << typeName(electionTimeElement.type())); } const BSONElement timeElement = doc[kTimeFieldName]; if (timeElement.eoo()) { _timeSet = false; } else if (timeElement.isNumber()) { _timeSet = true; _time = Seconds(timeElement.numberLong()); } else { return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected \"" << kTimeFieldName << "\" field in response to replSetHeartbeat " "command to have a numeric type, but found type " << typeName(timeElement.type())); } const BSONElement opTimeElement = doc[kOpTimeFieldName]; if (opTimeElement.eoo()) { _opTimeSet = false; } else if (opTimeElement.type() == bsonTimestamp) { _opTimeSet = true; _opTime = opTimeElement.timestamp(); } else if (opTimeElement.type() == Date) { _opTimeSet = true; _opTime = Timestamp(opTimeElement.date()); } else { return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected \"" << kOpTimeFieldName << "\" field in response to replSetHeartbeat " "command to have type Date or Timestamp, but found type " << typeName(opTimeElement.type())); } const BSONElement electableElement = doc[kIsElectableFieldName]; if (electableElement.eoo()) { _electableSet = false; } else { _electableSet = true; _electable = electableElement.trueValue(); } _isReplSet = doc[kIsReplSetFieldName].trueValue(); const BSONElement memberStateElement = doc[kMemberStateFieldName]; if (memberStateElement.eoo()) { _stateSet = false; } else if (memberStateElement.type() != NumberInt && memberStateElement.type() != NumberLong) { return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected \"" << kMemberStateFieldName << "\" field in response to replSetHeartbeat " "command to have type NumberInt or NumberLong, but found type " << typeName(memberStateElement.type())); } else { long long stateInt = memberStateElement.numberLong(); if (stateInt < 0 || stateInt > MemberState::RS_MAX) { return Status(ErrorCodes::BadValue, str::stream() << "Value for \"" << kMemberStateFieldName << "\" in response to replSetHeartbeat is " "out of range; legal values are non-negative and no more than " << MemberState::RS_MAX); } _stateSet = true; _state = MemberState(static_cast<int>(stateInt)); } _stateDisagreement = doc[kHasStateDisagreementFieldName].trueValue(); // Not required for the case of uninitialized members -- they have no config const BSONElement versionElement = doc[kConfigVersionFieldName]; // If we have an optime then we must have a version if (_opTimeSet && versionElement.eoo()) { return Status(ErrorCodes::NoSuchKey, str::stream() << "Response to replSetHeartbeat missing required \"" << kConfigVersionFieldName << "\" field even though initialized"); } // If there is a "v" (config version) then it must be an int. if (!versionElement.eoo() && versionElement.type() != NumberInt) { return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected \"" << kConfigVersionFieldName << "\" field in response to replSetHeartbeat to have " "type NumberInt, but found " << typeName(versionElement.type())); } _version = versionElement.numberInt(); const BSONElement hbMsgElement = doc[kHbMessageFieldName]; if (hbMsgElement.eoo()) { _hbmsg.clear(); } else if (hbMsgElement.type() != String) { return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected \"" << kHbMessageFieldName << "\" field in response to replSetHeartbeat to have " "type String, but found " << typeName(hbMsgElement.type())); } else { _hbmsg = hbMsgElement.String(); } const BSONElement syncingToElement = doc[kSyncSourceFieldName]; if (syncingToElement.eoo()) { _syncingTo.clear(); } else if (syncingToElement.type() != String) { return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected \"" << kSyncSourceFieldName << "\" field in response to replSetHeartbeat to " "have type String, but found " << typeName(syncingToElement.type())); } else { _syncingTo = syncingToElement.String(); } const BSONElement rsConfigElement = doc[kConfigFieldName]; if (rsConfigElement.eoo()) { _configSet = false; _config = ReplicaSetConfig(); return Status::OK(); } else if (rsConfigElement.type() != Object) { return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected \"" << kConfigFieldName << "\" in response to replSetHeartbeat to have type " "Object, but found " << typeName(rsConfigElement.type())); } _configSet = true; return _config.initialize(rsConfigElement.Obj()); }