Exemple #1
0
 bool DBClientReplicaSet::call( Message &toSend, Message &response, bool assertOk , string * actualServer ) {
     if ( toSend.operation() == dbQuery ) {
         // TODO: might be possible to do this faster by changing api
         DbMessage dm( toSend );
         QueryMessage qm( dm );
         if ( qm.queryOptions & QueryOption_SlaveOk ) {
             for ( int i=0; i<3; i++ ) {
                 try {
                     DBClientConnection* s = checkSlave();
                     if ( actualServer )
                         *actualServer = s->getServerAddress();
                     return s->call( toSend , response , assertOk );
                 }
                 catch ( DBException &e ) {
                 	LOG(1) << "can't call replica set slave " << i << " : " << _slaveHost << causedBy( e ) << endl;
                     if ( actualServer )
                         *actualServer = "";
                 }
             }
         }
     }
     
     DBClientConnection* m = checkMaster();
     if ( actualServer )
         *actualServer = m->getServerAddress();
     return m->call( toSend , response , assertOk );
 }
Exemple #2
0
    void syncFixUp(OperationContext* txn,
                   FixUpInfo& fixUpInfo,
                   OplogReader* oplogreader,
                   ReplicationCoordinator* replCoord) {
        DBClientConnection* them = oplogreader->conn();

        // fetch all first so we needn't handle interruption in a fancy way

        unsigned long long totalSize = 0;

        list< pair<DocID, BSONObj> > goodVersions;

        BSONObj newMinValid;

        // fetch all the goodVersions of each document from current primary
        DocID doc;
        unsigned long long numFetched = 0;
        try {
            for (set<DocID>::iterator it = fixUpInfo.toRefetch.begin();
                    it != fixUpInfo.toRefetch.end();
                    it++) {
                doc = *it;

                verify(!doc._id.eoo());

                {
                    // TODO : slow.  lots of round trips.
                    numFetched++;
                    BSONObj good = them->findOne(doc.ns, doc._id.wrap(),
                                                     NULL, QueryOption_SlaveOk).getOwned();
                    totalSize += good.objsize();
                    uassert(13410, "replSet too much data to roll back",
                            totalSize < 300 * 1024 * 1024);

                    // note good might be eoo, indicating we should delete it
                    goodVersions.push_back(pair<DocID, BSONObj>(doc,good));
                }
            }
            newMinValid = oplogreader->getLastOp(rsOplogName);
            if (newMinValid.isEmpty()) {
                error() << "rollback error newMinValid empty?";
                return;
            }
        }
        catch (DBException& e) {
            LOG(1) << "rollback re-get objects: " << e.toString();
            error() << "rollback couldn't re-get ns:" << doc.ns << " _id:" << doc._id << ' '
                    << numFetched << '/' << fixUpInfo.toRefetch.size();
            throw e;
        }

        log() << "rollback 3.5";
        if (fixUpInfo.rbid != getRBID(oplogreader->conn())) {
            // our source rolled back itself.  so the data we received isn't necessarily consistent.
            warning() << "rollback rbid on source changed during rollback, cancelling this attempt";
            return;
        }

        // update them
        log() << "rollback 4 n:" << goodVersions.size();

        bool warn = false;

        invariant(!fixUpInfo.commonPointOurDiskloc.isNull());
        invariant(txn->lockState()->isW());

        // we have items we are writing that aren't from a point-in-time.  thus best not to come
        // online until we get to that point in freshness.
        Timestamp minValid = newMinValid["ts"].timestamp();
        log() << "minvalid=" << minValid.toStringLong();
        setMinValid(txn, minValid);

        // any full collection resyncs required?
        if (!fixUpInfo.collectionsToResyncData.empty()
                || !fixUpInfo.collectionsToResyncMetadata.empty()) {

            for (const string& ns : fixUpInfo.collectionsToResyncData) {
                log() << "rollback 4.1.1 coll resync " << ns;

                fixUpInfo.collectionsToResyncMetadata.erase(ns);

                const NamespaceString nss(ns);

                Database* db = dbHolder().openDb(txn, nss.db().toString());
                invariant(db);

                {
                    WriteUnitOfWork wunit(txn);
                    db->dropCollection(txn, ns);
                    wunit.commit();
                }

                {
                    string errmsg;

                    // This comes as a GlobalWrite lock, so there is no DB to be acquired after
                    // resume, so we can skip the DB stability checks. Also 
                    // copyCollectionFromRemote will acquire its own database pointer, under the
                    // appropriate locks, so just releasing and acquiring the lock is safe.
                    invariant(txn->lockState()->isW());
                    Lock::TempRelease release(txn->lockState());

                    bool ok = copyCollectionFromRemote(txn, them->getServerAddress(), ns, errmsg);
                    uassert(15909, str::stream() << "replSet rollback error resyncing collection "
                                                 << ns << ' ' << errmsg, ok);
                }
            }

            for (const string& ns : fixUpInfo.collectionsToResyncMetadata) {
                log() << "rollback 4.1.2 coll metadata resync " << ns;

                const NamespaceString nss(ns);
                auto db = dbHolder().openDb(txn, nss.db().toString());
                invariant(db);
                auto collection = db->getCollection(ns);
                invariant(collection);
                auto cce = collection->getCatalogEntry();

                const std::list<BSONObj> info =
                    them->getCollectionInfos(nss.db().toString(), BSON("name" << nss.coll()));

                if (info.empty()) {
                    // Collection dropped by "them" so we should drop it too.
                    log() << ns << " not found on remote host, dropping";
                    fixUpInfo.toDrop.insert(ns);
                    continue;
                }

                invariant(info.size() == 1);

                CollectionOptions options;
                auto status = options.parse(info.front());
                if (!status.isOK()) {
                    throw RSFatalException(str::stream() << "Failed to parse options "
                                                         << info.front() << ": "
                                                         << status.toString());
                }

                WriteUnitOfWork wuow(txn);
                if (options.flagsSet || cce->getCollectionOptions(txn).flagsSet) {
                    cce->updateFlags(txn, options.flags);
                }

                status = collection->setValidator(txn, options.validator);
                if (!status.isOK()) {
                    throw RSFatalException(str::stream() << "Failed to set validator: "
                                                         << status.toString());
                }
                wuow.commit();
            }

            // we did more reading from primary, so check it again for a rollback (which would mess
            // us up), and make minValid newer.
            log() << "rollback 4.2";

            string err;
            try {
                newMinValid = oplogreader->getLastOp(rsOplogName);
                if (newMinValid.isEmpty()) {
                    err = "can't get minvalid from sync source";
                }
                else {
                    Timestamp minValid = newMinValid["ts"].timestamp();
                    log() << "minvalid=" << minValid.toStringLong();
                    setMinValid(txn, minValid);
                }
            }
            catch (DBException& e) {
                err = "can't get/set minvalid: ";
                err += e.what();
            }
            if (fixUpInfo.rbid != getRBID(oplogreader->conn())) {
                // our source rolled back itself.  so the data we received isn't necessarily
                // consistent. however, we've now done writes.  thus we have a problem.
                err += "rbid at primary changed during resync/rollback";
            }
            if (!err.empty()) {
                severe() << "rolling back : " << err
                        << ". A full resync will be necessary.";
                // TODO: reset minvalid so that we are permanently in fatal state
                // TODO: don't be fatal, but rather, get all the data first.
                throw RSFatalException();
            }
            log() << "rollback 4.3";
        }

        map<string,shared_ptr<Helpers::RemoveSaver> > removeSavers;

        log() << "rollback 4.6";
        // drop collections to drop before doing individual fixups - that might make things faster
        // below actually if there were subsequent inserts to rollback
        for (set<string>::iterator it = fixUpInfo.toDrop.begin();
                it != fixUpInfo.toDrop.end();
                it++) {
            log() << "rollback drop: " << *it;

            Database* db = dbHolder().get(txn, nsToDatabaseSubstring(*it));
            if (db) {
                WriteUnitOfWork wunit(txn);

                shared_ptr<Helpers::RemoveSaver>& removeSaver = removeSavers[*it];
                if (!removeSaver)
                    removeSaver.reset(new Helpers::RemoveSaver("rollback", "", *it));

                // perform a collection scan and write all documents in the collection to disk
                boost::scoped_ptr<PlanExecutor> exec(
                        InternalPlanner::collectionScan(txn,
                                                        *it,
                                                        db->getCollection(*it)));
                BSONObj curObj;
                PlanExecutor::ExecState execState;
                while (PlanExecutor::ADVANCED == (execState = exec->getNext(&curObj, NULL))) {
                    removeSaver->goingToDelete(curObj);
                }
                if (execState != PlanExecutor::IS_EOF) {
                    if (execState == PlanExecutor::FAILURE &&
                            WorkingSetCommon::isValidStatusMemberObject(curObj)) {
                        Status errorStatus = WorkingSetCommon::getMemberObjectStatus(curObj);
                        severe() << "rolling back createCollection on " << *it
                                 << " failed with " << errorStatus
                                 << ". A full resync is necessary.";
                    }
                    else {
                        severe() << "rolling back createCollection on " << *it
                                 << " failed. A full resync is necessary.";
                    }
                            
                    throw RSFatalException();
                }

                db->dropCollection(txn, *it);
                wunit.commit();
            }
        }

        log() << "rollback 4.7";
        OldClientContext ctx(txn, rsOplogName);
        Collection* oplogCollection = ctx.db()->getCollection(rsOplogName);
        uassert(13423,
                str::stream() << "replSet error in rollback can't find " << rsOplogName,
                oplogCollection);

        unsigned deletes = 0, updates = 0;
        time_t lastProgressUpdate = time(0);
        time_t progressUpdateGap = 10;
        for (list<pair<DocID, BSONObj> >::iterator it = goodVersions.begin();
                it != goodVersions.end();
                it++) {
            time_t now = time(0);
            if (now - lastProgressUpdate > progressUpdateGap) {
                log() << deletes << " delete and "
                      << updates << " update operations processed out of "
                      << goodVersions.size() << " total operations";
                lastProgressUpdate = now;
            }
            const DocID& doc = it->first;
            BSONObj pattern = doc._id.wrap(); // { _id : ... }
            try {
                verify(doc.ns && *doc.ns);
                if (fixUpInfo.collectionsToResyncData.count(doc.ns)) {
                    // we just synced this entire collection
                    continue;
                }

                // keep an archive of items rolled back
                shared_ptr<Helpers::RemoveSaver>& removeSaver = removeSavers[doc.ns];
                if (!removeSaver)
                    removeSaver.reset(new Helpers::RemoveSaver("rollback", "", doc.ns));

                // todo: lots of overhead in context, this can be faster
                OldClientContext ctx(txn, doc.ns);

                // Add the doc to our rollback file
                BSONObj obj;
                Collection* collection = ctx.db()->getCollection(doc.ns);

                // Do not log an error when undoing an insert on a no longer existent collection.
                // It is likely that the collection was dropped as part of rolling back a
                // createCollection command and regardless, the document no longer exists.
                if (collection) {
                    bool found = Helpers::findOne(txn, collection, pattern, obj, false);
                    if (found) {
                        removeSaver->goingToDelete(obj);
                    }
                    else {
                        error() << "rollback cannot find object: " << pattern
                                << " in namespace " << doc.ns;
                    }
                }

                if (it->second.isEmpty()) {
                    // wasn't on the primary; delete.
                    // TODO 1.6 : can't delete from a capped collection.  need to handle that here.
                    deletes++;

                    if (collection) {
                        if (collection->isCapped()) {
                            // can't delete from a capped collection - so we truncate instead. if
                            // this item must go, so must all successors!!!
                            try {
                                // TODO: IIRC cappedTruncateAfter does not handle completely empty.
                                // this will crazy slow if no _id index.
                                long long start = Listener::getElapsedTimeMillis();
                                RecordId loc = Helpers::findOne(txn, collection, pattern, false);
                                if (Listener::getElapsedTimeMillis() - start > 200)
                                    warning() << "roll back slow no _id index for "
                                          << doc.ns << " perhaps?";
                                // would be faster but requires index:
                                // RecordId loc = Helpers::findById(nsd, pattern);
                                if (!loc.isNull()) {
                                    try {
                                        collection->temp_cappedTruncateAfter(txn, loc, true);
                                    }
                                    catch (DBException& e) {
                                        if (e.getCode() == 13415) {
                                            // hack: need to just make cappedTruncate do this...
                                            MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
                                                WriteUnitOfWork wunit(txn);
                                                uassertStatusOK(collection->truncate(txn));
                                                wunit.commit();
                                            } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(
                                                                            txn,
                                                                            "truncate",
                                                                            collection->ns().ns());
                                        }
                                        else {
                                            throw e;
                                        }
                                    }
                                }
                            }
                            catch (DBException& e) {
                                error() << "rolling back capped collection rec "
                                      << doc.ns << ' ' << e.toString();
                            }
                        }
                        else {
Exemple #3
0
    void ReplSetImpl::syncFixUp(HowToFixUp& h, OplogReader& r) {
        DBClientConnection *them = r.conn();

        // fetch all first so we needn't handle interruption in a fancy way

        unsigned long long totSize = 0;

        list< pair<DocID,bo> > goodVersions;

        bo newMinValid;

        /* fetch all the goodVersions of each document from current primary */
        DocID d;
        unsigned long long n = 0;
        try {
            for( set<DocID>::iterator i = h.toRefetch.begin(); i != h.toRefetch.end(); i++ ) {
                d = *i;

                assert( !d._id.eoo() );

                {
                    /* TODO : slow.  lots of round trips. */
                    n++;
                    bo good= them->findOne(d.ns, d._id.wrap()).getOwned();
                    totSize += good.objsize();
                    uassert( 13410, "replSet too much data to roll back", totSize < 300 * 1024 * 1024 );

                    // note good might be eoo, indicating we should delete it
                    goodVersions.push_back(pair<DocID,bo>(d,good));
                }
            }
            newMinValid = r.getLastOp(rsoplog);
            if( newMinValid.isEmpty() ) {
                sethbmsg("rollback error newMinValid empty?");
                return;
            }
        }
        catch(DBException& e) {
            sethbmsg(str::stream() << "rollback re-get objects: " << e.toString(),0);
            log() << "rollback couldn't re-get ns:" << d.ns << " _id:" << d._id << ' ' << n << '/' << h.toRefetch.size() << rsLog;
            throw e;
        }

        MemoryMappedFile::flushAll(true);

        sethbmsg("rollback 3.5");
        if( h.rbid != getRBID(r.conn()) ) {
            // our source rolled back itself.  so the data we received isn't necessarily consistent.
            sethbmsg("rollback rbid on source changed during rollback, cancelling this attempt");
            return;
        }

        // update them
        sethbmsg(str::stream() << "rollback 4 n:" << goodVersions.size());

        bool warn = false;

        assert( !h.commonPointOurDiskloc.isNull() );

        dbMutex.assertWriteLocked();

        /* we have items we are writing that aren't from a point-in-time.  thus best not to come online
           until we get to that point in freshness. */
        setMinValid(newMinValid);

        /** any full collection resyncs required? */
        if( !h.collectionsToResync.empty() ) {
            for( set<string>::iterator i = h.collectionsToResync.begin(); i != h.collectionsToResync.end(); i++ ) {
                string ns = *i;
                sethbmsg(str::stream() << "rollback 4.1 coll resync " << ns);
                Client::Context c(*i, dbpath, 0, /*doauth*/false);
                try {
                    bob res;
                    string errmsg;
                    dropCollection(ns, errmsg, res);
                    {
                        dbtemprelease r;
                        bool ok = copyCollectionFromRemote(them->getServerAddress(), ns, bo(), errmsg, false);
                        if( !ok ) {
                            log() << "replSet rollback error resyncing collection " << ns << ' ' << errmsg << rsLog;
                            throw "rollback error resyncing rollection [1]";
                        }
                    }
                }
                catch(...) {
                    log() << "replset rollback error resyncing collection " << ns << rsLog;
                    throw "rollback error resyncing rollection [2]";
                }
            }

            /* we did more reading from primary, so check it again for a rollback (which would mess us up), and
               make minValid newer.
               */
            sethbmsg("rollback 4.2");
            {
                string err;
                try {
                    newMinValid = r.getLastOp(rsoplog);
                    if( newMinValid.isEmpty() ) {
                        err = "can't get minvalid from primary";
                    }
                    else {
                        setMinValid(newMinValid);
                    }
                }
                catch(...) {
                    err = "can't get/set minvalid";
                }
                if( h.rbid != getRBID(r.conn()) ) {
                    // our source rolled back itself.  so the data we received isn't necessarily consistent.
                    // however, we've now done writes.  thus we have a problem.
                    err += "rbid at primary changed during resync/rollback";
                }
                if( !err.empty() ) {
                    log() << "replSet error rolling back : " << err << ". A full resync will be necessary." << rsLog;
                    /* todo: reset minvalid so that we are permanently in fatal state */
                    /* todo: don't be fatal, but rather, get all the data first. */
                    sethbmsg("rollback error");
                    throw rsfatal();
                }
            }
            sethbmsg("rollback 4.3");
        }

        sethbmsg("rollback 4.6");
        /** drop collections to drop before doing individual fixups - that might make things faster below actually if there were subsequent inserts to rollback */
        for( set<string>::iterator i = h.toDrop.begin(); i != h.toDrop.end(); i++ ) {
            Client::Context c(*i, dbpath, 0, /*doauth*/false);
            try {
                bob res;
                string errmsg;
                log(1) << "replSet rollback drop: " << *i << rsLog;
                dropCollection(*i, errmsg, res);
            }
            catch(...) {
                log() << "replset rollback error dropping collection " << *i << rsLog;
            }
        }

        sethbmsg("rollback 4.7");
        Client::Context c(rsoplog, dbpath, 0, /*doauth*/false);
        NamespaceDetails *oplogDetails = nsdetails(rsoplog);
        uassert(13423, str::stream() << "replSet error in rollback can't find " << rsoplog, oplogDetails);

        map<string,shared_ptr<RemoveSaver> > removeSavers;

        unsigned deletes = 0, updates = 0;
        for( list<pair<DocID,bo> >::iterator i = goodVersions.begin(); i != goodVersions.end(); i++ ) {
            const DocID& d = i->first;
            bo pattern = d._id.wrap(); // { _id : ... }
            try {
                assert( d.ns && *d.ns );
                if( h.collectionsToResync.count(d.ns) ) {
                    /* we just synced this entire collection */
                    continue;
                }

                /* keep an archive of items rolled back */
                shared_ptr<RemoveSaver>& rs = removeSavers[d.ns];
                if ( ! rs )
                    rs.reset( new RemoveSaver( "rollback" , "" , d.ns ) );

                // todo: lots of overhead in context, this can be faster
                Client::Context c(d.ns, dbpath, 0, /*doauth*/false);
                if( i->second.isEmpty() ) {
                    // wasn't on the primary; delete.
                    /* TODO1.6 : can't delete from a capped collection.  need to handle that here. */
                    deletes++;

                    NamespaceDetails *nsd = nsdetails(d.ns);
                    if( nsd ) {
                        if( nsd->capped ) {
                            /* can't delete from a capped collection - so we truncate instead. if this item must go,
                            so must all successors!!! */
                            try {
                                /** todo: IIRC cappedTrunateAfter does not handle completely empty.  todo. */
                                // this will crazy slow if no _id index.
                                long long start = Listener::getElapsedTimeMillis();
                                DiskLoc loc = Helpers::findOne(d.ns, pattern, false);
                                if( Listener::getElapsedTimeMillis() - start > 200 )
                                    log() << "replSet warning roll back slow no _id index for " << d.ns << " perhaps?" << rsLog;
                                //would be faster but requires index: DiskLoc loc = Helpers::findById(nsd, pattern);
                                if( !loc.isNull() ) {
                                    try {
                                        nsd->cappedTruncateAfter(d.ns, loc, true);
                                    }
                                    catch(DBException& e) {
                                        if( e.getCode() == 13415 ) {
                                            // hack: need to just make cappedTruncate do this...
                                            nsd->emptyCappedCollection(d.ns);
                                        }
                                        else {
                                            throw;
                                        }
                                    }
                                }
                            }
                            catch(DBException& e) {
                                log() << "replSet error rolling back capped collection rec " << d.ns << ' ' << e.toString() << rsLog;
                            }
                        }
                        else {
                            try {
                                deletes++;
                                deleteObjects(d.ns, pattern, /*justone*/true, /*logop*/false, /*god*/true, rs.get() );
                            }
                            catch(...) {
                                log() << "replSet error rollback delete failed ns:" << d.ns << rsLog;
                            }
                        }
                        // did we just empty the collection?  if so let's check if it even exists on the source.
                        if( nsd->stats.nrecords == 0 ) {
                            try {
                                string sys = cc().database()->name + ".system.namespaces";
                                bo o = them->findOne(sys, QUERY("name"<<d.ns));
                                if( o.isEmpty() ) {
                                    // we should drop
                                    try {
                                        bob res;
                                        string errmsg;
                                        dropCollection(d.ns, errmsg, res);
                                    }
                                    catch(...) {
                                        log() << "replset error rolling back collection " << d.ns << rsLog;
                                    }
                                }
                            }
                            catch(DBException& ) {
                                /* this isn't *that* big a deal, but is bad. */
                                log() << "replSet warning rollback error querying for existence of " << d.ns << " at the primary, ignoring" << rsLog;
                            }
                        }
                    }
                }
                else {
                    // todo faster...
                    OpDebug debug;
                    updates++;
                    _updateObjects(/*god*/true, d.ns, i->second, pattern, /*upsert=*/true, /*multi=*/false , /*logtheop=*/false , debug, rs.get() );
                }
            }
            catch(DBException& e) {
                log() << "replSet exception in rollback ns:" << d.ns << ' ' << pattern.toString() << ' ' << e.toString() << " ndeletes:" << deletes << rsLog;
                warn = true;
            }
        }

        removeSavers.clear(); // this effectively closes all of them

        sethbmsg(str::stream() << "rollback 5 d:" << deletes << " u:" << updates);
        MemoryMappedFile::flushAll(true);
        sethbmsg("rollback 6");

        // clean up oplog
        log(2) << "replSet rollback truncate oplog after " << h.commonPoint.toStringPretty() << rsLog;
        // todo: fatal error if this throws?
        oplogDetails->cappedTruncateAfter(rsoplog, h.commonPointOurDiskloc, false);

        /* reset cached lastoptimewritten and h value */
        loadLastOpTimeWritten();

        sethbmsg("rollback 7");
        MemoryMappedFile::flushAll(true);

        // done
        if( warn )
            sethbmsg("issues during syncRollback, see log");
        else
            sethbmsg("rollback done");
    }
    bool DBClientReplicaSet::call(Message &toSend,
                                  Message &response,
                                  bool assertOk,
                                  string * actualServer) {
        const char * ns = 0;

        if (toSend.operation() == dbQuery) {
            // TODO: might be possible to do this faster by changing api
            DbMessage dm(toSend);
            QueryMessage qm(dm);
            ns = qm.ns;

            shared_ptr<ReadPreferenceSetting> readPref( _extractReadPref( qm.query,
                                                                          qm.queryOptions ) );
            if ( _isSecondaryQuery( ns, qm.query, *readPref ) ) {

                LOG( 3 ) << "dbclient_rs call using secondary or tagged node selection in "
                                    << _getMonitor()->getName() << ", read pref is "
                                    << readPref->toBSON() << " (primary : "
                                    << ( _master.get() != NULL ?
                                            _master->getServerAddress() : "[not cached]" )
                                    << ", lastTagged : "
                                    << ( _lastSlaveOkConn.get() != NULL ?
                                            _lastSlaveOkConn->getServerAddress() : "[not cached]" )
                                    << ")" << endl;

                for (size_t retry = 0; retry < MAX_RETRY; retry++) {
                    try {
                        DBClientConnection* conn = selectNodeUsingTags(readPref);

                        if (conn == NULL) {
                            return false;
                        }

                        if (actualServer != NULL) {
                            *actualServer = conn->getServerAddress();
                        }

                        return conn->call(toSend, response, assertOk);
                    }
                    catch ( const DBException& dbExcep ) {
                        LOG(1) << "can't call replica set node " << _lastSlaveOkHost << ": "
                                          << causedBy( dbExcep ) << endl;

                        if ( actualServer ) *actualServer = "";

                        invalidateLastSlaveOkCache();
                    }
                }

                // Was not able to successfully send after max retries
                return false;
            }
        }
        
        LOG( 3 ) << "dbclient_rs call to primary node in " << _getMonitor()->getName() << endl;

        DBClientConnection* m = checkMaster();
        if ( actualServer )
            *actualServer = m->getServerAddress();
        
        if ( ! m->call( toSend , response , assertOk ) )
            return false;

        if ( ns ) {
            QueryResult * res = (QueryResult*)response.singleData();
            if ( res->nReturned == 1 ) {
                BSONObj x(res->data() );
                if ( str::contains( ns , "$cmd" ) ) {
                    if ( isNotMasterErrorString( x["errmsg"] ) )
                        isntMaster();
                }
                else {
                    if ( isNotMasterErrorString( getErrField( x ) ) )
                        isntMaster();
                }
            }
        }

        return true;
    }
    void DBClientReplicaSet::say(Message& toSend, bool isRetry, string* actualServer) {

        if (!isRetry)
            _lazyState = LazyState();

        const int lastOp = toSend.operation();

        if (lastOp == dbQuery) {
            // TODO: might be possible to do this faster by changing api
            DbMessage dm(toSend);
            QueryMessage qm(dm);

            shared_ptr<ReadPreferenceSetting> readPref( _extractReadPref( qm.query,
                                                                          qm.queryOptions ) );
            if ( _isSecondaryQuery( qm.ns, qm.query, *readPref ) ) {

                LOG( 3 ) << "dbclient_rs say using secondary or tagged node selection in "
                                    << _getMonitor()->getName() << ", read pref is "
                                    << readPref->toBSON() << " (primary : "
                                    << ( _master.get() != NULL ?
                                            _master->getServerAddress() : "[not cached]" )
                                    << ", lastTagged : "
                                    << ( _lastSlaveOkConn.get() != NULL ?
                                            _lastSlaveOkConn->getServerAddress() : "[not cached]" )
                                    << ")" << endl;

                string lastNodeErrMsg;

                for (size_t retry = 0; retry < MAX_RETRY; retry++) {
                    _lazyState._retries = retry;
                    try {
                        DBClientConnection* conn = selectNodeUsingTags(readPref);

                        if (conn == NULL) {
                            break;
                        }

                        if (actualServer != NULL) {
                            *actualServer = conn->getServerAddress();
                        }

                        conn->say(toSend);

                        _lazyState._lastOp = lastOp;
                        _lazyState._secondaryQueryOk = true;
                        _lazyState._lastClient = conn;
                    }
                    catch ( const DBException& DBExcep ) {
                        StringBuilder errMsgBuilder;
                        errMsgBuilder << "can't callLazy replica set node "
                                   << _lastSlaveOkHost.toString() << ": " << causedBy( DBExcep );
                        lastNodeErrMsg = errMsgBuilder.str();

                        LOG(1) << lastNodeErrMsg << endl;
                        invalidateLastSlaveOkCache();
                        continue;
                    }

                    return;
                }

                StringBuilder assertMsg;
                assertMsg << "Failed to call say, no good nodes in " << _getMonitor()->getName();
                if ( !lastNodeErrMsg.empty() ) {
                    assertMsg << ", last error: " << lastNodeErrMsg;
                }

                uasserted(16380, assertMsg.str());
            }
        }

        LOG( 3 ) << "dbclient_rs say to primary node in " << _getMonitor()->getName()
                            << endl;

        DBClientConnection* master = checkMaster();
        if (actualServer)
            *actualServer = master->getServerAddress();

        _lazyState._lastOp = lastOp;
        _lazyState._secondaryQueryOk = false;
        // Don't retry requests to primary since there is only one host to try
        _lazyState._retries = MAX_RETRY;
        _lazyState._lastClient = master;

        master->say(toSend);
        return;
    }