示例#1
0
    /* grab initial copy of a database from the master */
    void ReplSource::resync(const std::string& dbName) {
        const std::string db(dbName);   // need local copy of the name, we're dropping the original
        string dummyNs = resyncDrop( db.c_str(), "internal" );
        Client::Context ctx( dummyNs );
        {
            log() << "resync: cloning database " << db << " to get an initial copy" << endl;
            ReplInfo r("resync: cloning a database");
            string errmsg;
            int errCode = 0;
            bool ok = Cloner::cloneFrom(hostName.c_str(), errmsg, cc().database()->name(), false,
                                        /*slaveOk*/ true, /*replauth*/ true, /*snapshot*/false,
                                        /*mayYield*/true, /*mayBeInterrupted*/false, &errCode);
            if ( !ok ) {
                if ( errCode == DatabaseDifferCaseCode ) {
                    resyncDrop( db.c_str(), "internal" );
                    log() << "resync: database " << db << " not valid on the master due to a name conflict, dropping." << endl;
                    return;
                }
                else {
                    problem() << "resync of " << db << " from " << hostName << " failed " << errmsg << endl;
                    throw SyncException();
                }
            }
        }

        log() << "resync: done with initial clone for db: " << db << endl;

        return;
    }
示例#2
0
FixedBuffer::FixedBuffer(size_t capacity, size_t osize) :
Buffer(capacity)
{
	objsize = osize;
	buf = new char[capacity * objsize];

#ifdef	CCXX_EXCEPTIONS
	if(!buf && Thread::getException() == Thread::throwObject)
		throw(this);
#ifdef	COMMON_STD_EXCEPTION
	else if(!buf && Thread::getException() == Thread::throwException)
		throw(SyncException("fixed buffer failure"));
#endif
#endif

	head = tail = buf;
}
示例#3
0
文件: repl.cpp 项目: CoolCloud/mongo
    /* grab initial copy of a database from the master */
    bool ReplSource::resync(string db) {
        string dummyNs = resyncDrop( db.c_str(), "internal" );
        Client::Context ctx( dummyNs );
        {
            log() << "resync: cloning database " << db << " to get an initial copy" << endl;
            ReplInfo r("resync: cloning a database");
            string errmsg;
            bool ok = cloneFrom(hostName.c_str(), errmsg, cc().database()->name, false, /*slaveok*/ true, /*replauth*/ true, /*snapshot*/false, /*mayYield*/true, /*mayBeInterrupted*/false);
            if ( !ok ) {
                problem() << "resync of " << db << " from " << hostName << " failed " << errmsg << endl;
                throw SyncException();
            }
        }

        log() << "resync: done with initial clone for db: " << db << endl;

        return true;
    }
示例#4
0
文件: repl.cpp 项目: CoolCloud/mongo
    /* slave: pull some data from the master's oplog
       note: not yet in db mutex at this point.
       @return -1 error
               0 ok, don't sleep
               1 ok, sleep
    */
    int ReplSource::sync_pullOpLog(int& nApplied) {
        int okResultCode = 1;
        string ns = string("local.oplog.$") + sourceName();
        log(2) << "repl: sync_pullOpLog " << ns << " syncedTo:" << syncedTo.toStringLong() << '\n';

        bool tailing = true;
        oplogReader.tailCheck();

        bool initial = syncedTo.isNull();

        if ( !oplogReader.haveCursor() || initial ) {
            if ( initial ) {
                // Important to grab last oplog timestamp before listing databases.
                syncToTailOfRemoteLog();
                BSONObj info;
                bool ok = oplogReader.conn()->runCommand( "admin", BSON( "listDatabases" << 1 ), info );
                massert( 10389 ,  "Unable to get database list", ok );
                BSONObjIterator i( info.getField( "databases" ).embeddedObject() );
                while( i.moreWithEOO() ) {
                    BSONElement e = i.next();
                    if ( e.eoo() )
                        break;
                    string name = e.embeddedObject().getField( "name" ).valuestr();
                    if ( !e.embeddedObject().getBoolField( "empty" ) ) {
                        if ( name != "local" ) {
                            if ( only.empty() || only == name ) {
                                log( 2 ) << "adding to 'addDbNextPass': "******"$gte", syncedTo.asDate());
            BSONObjBuilder query;
            query.append("ts", q.done());
            if ( !only.empty() ) {
                // note we may here skip a LOT of data table scanning, a lot of work for the master.
                query.appendRegex("ns", string("^") + only); // maybe append "\\." here?
            }
            BSONObj queryObj = query.done();
            // e.g. queryObj = { ts: { $gte: syncedTo } }

            oplogReader.tailingQuery(ns.c_str(), queryObj);
            tailing = false;
        }
        else {
            log(2) << "repl: tailing=true\n";
        }

        if( !oplogReader.haveCursor() ) {
            problem() << "repl: dbclient::query returns null (conn closed?)" << endl;
            oplogReader.resetConnection();
            return -1;
        }

        // show any deferred database creates from a previous pass
        {
            set<string>::iterator i = addDbNextPass.begin();
            if ( i != addDbNextPass.end() ) {
                BSONObjBuilder b;
                b.append("ns", *i + '.');
                b.append("op", "db");
                BSONObj op = b.done();
                sync_pullOpLog_applyOperation(op, false);
            }
        }

        if ( !oplogReader.more() ) {
            if ( tailing ) {
                log(2) << "repl: tailing & no new activity\n";
                if( oplogReader.awaitCapable() )
                    okResultCode = 0; // don't sleep

            }
            else {
                log() << "repl:   " << ns << " oplog is empty\n";
            }
            {
                dblock lk;
                save();
            }
            return okResultCode;
        }

        OpTime nextOpTime;
        {
            BSONObj op = oplogReader.next();
            BSONElement ts = op.getField("ts");
            if ( ts.type() != Date && ts.type() != Timestamp ) {
                string err = op.getStringField("$err");
                if ( !err.empty() ) {
                    // 13051 is "tailable cursor requested on non capped collection"
                    if (op.getIntField("code") == 13051) {
                        problem() << "trying to slave off of a non-master" << '\n';
                        massert( 13344 ,  "trying to slave off of a non-master", false );
                    }
                    else {
                        problem() << "repl: $err reading remote oplog: " + err << '\n';
                        massert( 10390 ,  "got $err reading remote oplog", false );
                    }
                }
                else {
                    problem() << "repl: bad object read from remote oplog: " << op.toString() << '\n';
                    massert( 10391 , "repl: bad object read from remote oplog", false);
                }
            }

            nextOpTime = OpTime( ts.date() );
            log(2) << "repl: first op time received: " << nextOpTime.toString() << '\n';
            if ( initial ) {
                log(1) << "repl:   initial run\n";
            }
            if( tailing ) {
                if( !( syncedTo < nextOpTime ) ) {
                    log() << "repl ASSERTION failed : syncedTo < nextOpTime" << endl;
                    log() << "repl syncTo:     " << syncedTo.toStringLong() << endl;
                    log() << "repl nextOpTime: " << nextOpTime.toStringLong() << endl;
                    assert(false);
                }
                oplogReader.putBack( op ); // op will be processed in the loop below
                nextOpTime = OpTime(); // will reread the op below
            }
            else if ( nextOpTime != syncedTo ) { // didn't get what we queried for - error
                Nullstream& l = log();
                l << "repl:   nextOpTime " << nextOpTime.toStringLong() << ' ';
                if ( nextOpTime < syncedTo )
                    l << "<??";
                else
                    l << ">";

                l << " syncedTo " << syncedTo.toStringLong() << '\n';
                log() << "repl:   time diff: " << (nextOpTime.getSecs() - syncedTo.getSecs()) << "sec\n";
                log() << "repl:   tailing: " << tailing << '\n';
                log() << "repl:   data too stale, halting replication" << endl;
                replInfo = replAllDead = "data too stale halted replication";
                assert( syncedTo < nextOpTime );
                throw SyncException();
            }
            else {
                /* t == syncedTo, so the first op was applied previously or it is the first op of initial query and need not be applied. */
            }
        }

        // apply operations
        {
            int n = 0;
            time_t saveLast = time(0);
            while ( 1 ) {

                bool moreInitialSyncsPending = !addDbNextPass.empty() && n; // we need "&& n" to assure we actually process at least one op to get a sync point recorded in the first place.

                if ( moreInitialSyncsPending || !oplogReader.more() ) {
                    dblock lk;

                    // NOTE aaron 2011-03-29 This block may be unnecessary, but I'm leaving it in place to avoid changing timing behavior.
                    {
                        dbtemprelease t;
                        if ( !moreInitialSyncsPending && oplogReader.more() ) {
                            continue;
                        }
                        // otherwise, break out of loop so we can set to completed or clone more dbs
                    }
                    
                    if( oplogReader.awaitCapable() && tailing )
                        okResultCode = 0; // don't sleep
                    syncedTo = nextOpTime;
                    save(); // note how far we are synced up to now
                    log() << "repl:   applied " << n << " operations" << endl;
                    nApplied = n;
                    log() << "repl:  end sync_pullOpLog syncedTo: " << syncedTo.toStringLong() << endl;
                    break;
                }
                else {
                }

                OCCASIONALLY if( n > 0 && ( n > 100000 || time(0) - saveLast > 60 ) ) {
                    // periodically note our progress, in case we are doing a lot of work and crash
                    dblock lk;
                    syncedTo = nextOpTime;
                    // can't update local log ts since there are pending operations from our peer
                    save();
                    log() << "repl:   checkpoint applied " << n << " operations" << endl;
                    log() << "repl:   syncedTo: " << syncedTo.toStringLong() << endl;
                    saveLast = time(0);
                    n = 0;
                }

                BSONObj op = oplogReader.next();

                unsigned b = replApplyBatchSize;
                bool justOne = b == 1;
                scoped_ptr<writelock> lk( justOne ? 0 : new writelock() );
                while( 1 ) {

                    BSONElement ts = op.getField("ts");
                    if( !( ts.type() == Date || ts.type() == Timestamp ) ) {
                        log() << "sync error: problem querying remote oplog record" << endl;
                        log() << "op: " << op.toString() << endl;
                        log() << "halting replication" << endl;
                        replInfo = replAllDead = "sync error: no ts found querying remote oplog record";
                        throw SyncException();
                    }
                    OpTime last = nextOpTime;
                    nextOpTime = OpTime( ts.date() );
                    if ( !( last < nextOpTime ) ) {
                        log() << "sync error: last applied optime at slave >= nextOpTime from master" << endl;
                        log() << " last:       " << last.toStringLong() << endl;
                        log() << " nextOpTime: " << nextOpTime.toStringLong() << endl;
                        log() << " halting replication" << endl;
                        replInfo = replAllDead = "sync error last >= nextOpTime";
                        uassert( 10123 , "replication error last applied optime at slave >= nextOpTime from master", false);
                    }
                    if ( replSettings.slavedelay && ( unsigned( time( 0 ) ) < nextOpTime.getSecs() + replSettings.slavedelay ) ) {
                        assert( justOne );
                        oplogReader.putBack( op );
                        _sleepAdviceTime = nextOpTime.getSecs() + replSettings.slavedelay + 1;
                        dblock lk;
                        if ( n > 0 ) {
                            syncedTo = last;
                            save();
                        }
                        log() << "repl:   applied " << n << " operations" << endl;
                        log() << "repl:   syncedTo: " << syncedTo.toStringLong() << endl;
                        log() << "waiting until: " << _sleepAdviceTime << " to continue" << endl;
                        return okResultCode;
                    }

                    sync_pullOpLog_applyOperation(op, !justOne);
                    n++;

                    if( --b == 0 )
                        break;
                    // if to here, we are doing mulpile applications in a singel write lock acquisition
                    if( !oplogReader.moreInCurrentBatch() ) {
                        // break if no more in batch so we release lock while reading from the master
                        break;
                    }
                    op = oplogReader.next();

                    getDur().commitIfNeeded();
                }
            }
        }

        return okResultCode;
    }
示例#5
0
文件: repl.cpp 项目: CoolCloud/mongo
    /* local.$oplog.main is of the form:
         { ts: ..., op: <optype>, ns: ..., o: <obj> , o2: <extraobj>, b: <boolflag> }
         ...
       see logOp() comments.

       @param alreadyLocked caller already put us in write lock if true
    */
    void ReplSource::sync_pullOpLog_applyOperation(BSONObj& op, bool alreadyLocked) {
        if( logLevel >= 6 ) // op.tostring is expensive so doing this check explicitly
            log(6) << "processing op: " << op << endl;

        if( op.getStringField("op")[0] == 'n' )
            return;

        char clientName[MaxDatabaseNameLen];
        const char *ns = op.getStringField("ns");
        nsToDatabase(ns, clientName);

        if ( *ns == '.' ) {
            problem() << "skipping bad op in oplog: " << op.toString() << endl;
            return;
        }
        else if ( *ns == 0 ) {
            /*if( op.getStringField("op")[0] != 'n' )*/ {
                problem() << "halting replication, bad op in oplog:\n  " << op.toString() << endl;
                replAllDead = "bad object in oplog";
                throw SyncException();
            }
            //ns = "local.system.x";
            //nsToDatabase(ns, clientName);
        }

        if ( !only.empty() && only != clientName )
            return;

        if( cmdLine.pretouch && !alreadyLocked/*doesn't make sense if in write lock already*/ ) {
            if( cmdLine.pretouch > 1 ) {
                /* note: this is bad - should be put in ReplSource.  but this is first test... */
                static int countdown;
                assert( countdown >= 0 );
                if( countdown > 0 ) {
                    countdown--; // was pretouched on a prev pass
                }
                else {
                    const int m = 4;
                    if( tp.get() == 0 ) {
                        int nthr = min(8, cmdLine.pretouch);
                        nthr = max(nthr, 1);
                        tp.reset( new ThreadPool(nthr) );
                    }
                    vector<BSONObj> v;
                    oplogReader.peek(v, cmdLine.pretouch);
                    unsigned a = 0;
                    while( 1 ) {
                        if( a >= v.size() ) break;
                        unsigned b = a + m - 1; // v[a..b]
                        if( b >= v.size() ) b = v.size() - 1;
                        tp->schedule(pretouchN, v, a, b);
                        DEV cout << "pretouch task: " << a << ".." << b << endl;
                        a += m;
                    }
                    // we do one too...
                    pretouchOperation(op);
                    tp->join();
                    countdown = v.size();
                }
            }
            else {
                pretouchOperation(op);
            }
        }

        scoped_ptr<writelock> lk( alreadyLocked ? 0 : new writelock() );

        if ( replAllDead ) {
            // hmmm why is this check here and not at top of this function? does it get set between top and here?
            log() << "replAllDead, throwing SyncException: " << replAllDead << endl;
            throw SyncException();
        }

        Client::Context ctx( ns );
        ctx.getClient()->curop()->reset();

        bool empty = ctx.db()->isEmpty();
        bool incompleteClone = incompleteCloneDbs.count( clientName ) != 0;

        if( logLevel >= 6 )
            log(6) << "ns: " << ns << ", justCreated: " << ctx.justCreated() << ", empty: " << empty << ", incompleteClone: " << incompleteClone << endl;

        // always apply admin command command
        // this is a bit hacky -- the semantics of replication/commands aren't well specified
        if ( strcmp( clientName, "admin" ) == 0 && *op.getStringField( "op" ) == 'c' ) {
            applyOperation( op );
            return;
        }

        if ( ctx.justCreated() || empty || incompleteClone ) {
            // we must add to incomplete list now that setClient has been called
            incompleteCloneDbs.insert( clientName );
            if ( nClonedThisPass ) {
                /* we only clone one database per pass, even if a lot need done.  This helps us
                 avoid overflowing the master's transaction log by doing too much work before going
                 back to read more transactions. (Imagine a scenario of slave startup where we try to
                 clone 100 databases in one pass.)
                 */
                addDbNextPass.insert( clientName );
            }
            else {
                if ( incompleteClone ) {
                    log() << "An earlier initial clone of '" << clientName << "' did not complete, now resyncing." << endl;
                }
                save();
                Client::Context ctx(ns);
                nClonedThisPass++;
                resync(ctx.db()->name);
                addDbNextPass.erase(clientName);
                incompleteCloneDbs.erase( clientName );
            }
            save();
        }
        else {
            applyOperation( op );
            addDbNextPass.erase( clientName );
        }
    }