Example #1
0
    bool Sync::shouldRetry(const BSONObj& o) {
        // should already have write lock
        const char *ns = o.getStringField("ns");
        Client::Context ctx(ns);
        OperationContextImpl txn;

        // we don't have the object yet, which is possible on initial sync.  get it.
        log() << "replication info adding missing object" << endl; // rare enough we can log

        BSONObj missingObj = getMissingDoc(ctx.db(), o);

        if( missingObj.isEmpty() ) {
            log() << "replication missing object not found on source. presumably deleted later in oplog" << endl;
            log() << "replication o2: " << o.getObjectField("o2").toString() << endl;
            log() << "replication o firstfield: " << o.getObjectField("o").firstElementFieldName() << endl;

            return false;
        }
        else {
            Collection* collection = ctx.db()->getOrCreateCollection( ns );
            verify( collection ); // should never happen
            StatusWith<DiskLoc> result = collection->insertDocument( &txn, missingObj, true );
            uassert(15917,
                    str::stream() << "failed to insert missing doc: " << result.toString(),
                    result.isOK() );

            LOG(1) << "replication inserted missing doc: " << missingObj.toString() << endl;
            return true;
        }
    }
Example #2
0
File: sync.cpp Project: wjin/mongo
    bool Sync::shouldRetry(OperationContext* txn, const BSONObj& o) {
        const NamespaceString nss(o.getStringField("ns"));

        // Take an X lock on the database in order to preclude other modifications. Also, the
        // database might not exist yet, so create it.
        AutoGetOrCreateDb autoDb(txn, nss.db(), MODE_X);
        Database* const db = autoDb.getDb();

        // we don't have the object yet, which is possible on initial sync.  get it.
        log() << "replication info adding missing object" << endl; // rare enough we can log

        BSONObj missingObj = getMissingDoc(txn, db, o);

        if( missingObj.isEmpty() ) {
            log() << "replication missing object not found on source. presumably deleted later in oplog" << endl;
            log() << "replication o2: " << o.getObjectField("o2").toString() << endl;
            log() << "replication o firstfield: " << o.getObjectField("o").firstElementFieldName() << endl;

            return false;
        }
        else {
            WriteUnitOfWork wunit(txn);

            Collection* const collection = db->getOrCreateCollection(txn, nss.toString());
            invariant(collection);

            StatusWith<RecordId> result = collection->insertDocument(txn, missingObj, true);
            uassert(15917,
                    str::stream() << "failed to insert missing doc: " << result.toString(),
                    result.isOK() );

            LOG(1) << "replication inserted missing doc: " << missingObj.toString() << endl;

            wunit.commit();
            return true;
        }
    }
Example #3
0
    void Balancer::run() {

        // this is the body of a BackgroundJob so if we throw here we're basically ending the balancer thread prematurely
        while ( ! inShutdown() ) {

            if ( ! _init() ) {
                log() << "will retry to initialize balancer in one minute" << endl;
                sleepsecs( 60 );
                continue;
            }

            break;
        }

        int sleepTime = 10;

        // getConnectioString and dist lock constructor does not throw, which is what we expect on while
        // on the balancer thread
        ConnectionString config = configServer.getConnectionString();
        DistributedLock balanceLock( config , "balancer" );

        while ( ! inShutdown() ) {

            try {

                ScopedDbConnection conn(config.toString(), 30);

                // ping has to be first so we keep things in the config server in sync
                _ping();

                // use fresh shard state
                Shard::reloadShardInfo();

                // refresh chunk size (even though another balancer might be active)
                Chunk::refreshChunkSize();

                SettingsType balancerConfig;
                string errMsg;

                if (!grid.getBalancerSettings(&balancerConfig, &errMsg)) {
                    warning() << errMsg;
                    return ;
                }

                // now make sure we should even be running
                if ((balancerConfig.isKeySet() && // balancer config doc exists
                        !grid.shouldBalance(balancerConfig)) ||
                        MONGO_FAIL_POINT(skipBalanceRound)) {

                    LOG(1) << "skipping balancing round because balancing is disabled" << endl;

                    // Ping again so scripts can determine if we're active without waiting
                    _ping( true );

                    conn.done();

                    sleepsecs( sleepTime );
                    continue;
                }

                uassert( 13258 , "oids broken after resetting!" , _checkOIDs() );

                {
                    dist_lock_try lk( &balanceLock , "doing balance round" );
                    if ( ! lk.got() ) {
                        LOG(1) << "skipping balancing round because another balancer is active" << endl;

                        // Ping again so scripts can determine if we're active without waiting
                        _ping( true );

                        conn.done();
                        
                        sleepsecs( sleepTime ); // no need to wake up soon
                        continue;
                    }

                    if ( !isConfigServerConsistent() ) {
                        conn.done();
                        warning() << "Skipping balancing round because data inconsistency"
                                  << " was detected amongst the config servers." << endl;
                        sleepsecs( sleepTime );
                        continue;
                    }

                    const bool waitForDelete = (balancerConfig.isWaitForDeleteSet() ?
                            balancerConfig.getWaitForDelete() : false);

                    scoped_ptr<WriteConcernOptions> writeConcern;
                    if (balancerConfig.isKeySet()) { // if balancer doc exists.
                        StatusWith<WriteConcernOptions*> extractStatus =
                                balancerConfig.extractWriteConcern();
                        if (extractStatus.isOK()) {
                            writeConcern.reset(extractStatus.getValue());
                        }
                        else {
                            warning() << extractStatus.toString();
                        }
                    }

                    LOG(1) << "*** start balancing round. "
                           << "waitForDelete: " << waitForDelete
                           << ", secondaryThrottle: "
                           << (writeConcern.get() ? writeConcern->toBSON().toString() : "default")
                           << endl;

                    vector<CandidateChunkPtr> candidateChunks;
                    _doBalanceRound( conn.conn() , &candidateChunks );
                    if ( candidateChunks.size() == 0 ) {
                        LOG(1) << "no need to move any chunk" << endl;
                        _balancedLastTime = 0;
                    }
                    else {
                        _balancedLastTime = _moveChunks(&candidateChunks,
                                                        writeConcern.get(),
                                                        waitForDelete );
                    }

                    LOG(1) << "*** end of balancing round" << endl;
                }

                // Ping again so scripts can determine if we're active without waiting
                _ping( true );
                
                conn.done();

                sleepsecs( _balancedLastTime ? sleepTime / 10 : sleepTime );
            }
            catch ( std::exception& e ) {
                log() << "caught exception while doing balance: " << e.what() << endl;

                // Just to match the opening statement if in log level 1
                LOG(1) << "*** End of balancing round" << endl;

                sleepsecs( sleepTime ); // sleep a fair amount b/c of error
                continue;
            }
        }

    }
Example #4
0
        void operator()( DBClientCursorBatchIterator &i ) {
            Lock::GlobalWrite lk;
            context.relocked();

            bool createdCollection = false;
            Collection* collection = NULL;

            while( i.moreInCurrentBatch() ) {
                if ( numSeen % 128 == 127 /*yield some*/ ) {
                    collection = NULL;
                    time_t now = time(0);
                    if( now - lastLog >= 60 ) {
                        // report progress
                        if( lastLog )
                            log() << "clone " << to_collection << ' ' << numSeen << endl;
                        lastLog = now;
                    }
                    mayInterrupt( _mayBeInterrupted );
                    dbtempreleaseif t( _mayYield );
                }

                if ( isindex == false && collection == NULL ) {
                    collection = context.db()->getCollection( to_collection );
                    if ( !collection ) {
                        massert( 17321,
                                 str::stream()
                                 << "collection dropped during clone ["
                                 << to_collection << "]",
                                 !createdCollection );
                        createdCollection = true;
                        collection = context.db()->createCollection( txn, to_collection );
                        verify( collection );
                    }
                }

                BSONObj tmp = i.nextSafe();

                /* assure object is valid.  note this will slow us down a little. */
                const Status status = validateBSON(tmp.objdata(), tmp.objsize());
                if (!status.isOK()) {
                    out() << "Cloner: skipping corrupt object from " << from_collection
                          << ": " << status.reason();
                    continue;
                }

                ++numSeen;

                BSONObj js = tmp;
                if ( isindex ) {
                    verify(nsToCollectionSubstring(from_collection) == "system.indexes");
                    js = fixindex(context.db()->name(), tmp);
                    indexesToBuild->push_back( js.getOwned() );
                    continue;
                }

                verify(nsToCollectionSubstring(from_collection) != "system.indexes");

                StatusWith<DiskLoc> loc = collection->insertDocument( txn, js, true );
                if ( !loc.isOK() ) {
                    error() << "error: exception cloning object in " << from_collection
                            << ' ' << loc.toString() << " obj:" << js;
                }
                uassertStatusOK( loc.getStatus() );
                if ( logForRepl )
                    logOp(txn, "i", to_collection, js);

                getDur().commitIfNeeded();

                RARELY if ( time( 0 ) - saveLast > 60 ) {
                    log() << numSeen << " objects cloned so far from collection " << from_collection;
                    saveLast = time( 0 );
                }
            }
        }