Example #1
0
 /* check members OTHER THAN US to see if they think they are primary */
 const Member * Manager::findOtherPrimary() { 
     Member *m = rs->head();
     Member *p = 0;
     while( m ) {
         if( m->state().primary() && m->hbinfo().up() ) {
             if( p ) throw "twomasters"; // our polling is asynchronous, so this is often ok.
             p = m;
         }
         m = m->next();
     }
     if( p ) 
         noteARemoteIsPrimary(p);
     return p;
 }
Example #2
0
 /* check members OTHER THAN US to see if they think they are primary */
 const Member * Manager::findOtherPrimary(bool& two) {
     two = false;
     Member *m = rs->head();
     Member *p = 0;
     while( m ) {
         DEV verify( m != rs->_self );
         if( m->state().primary() && m->hbinfo().up() ) {
             if( p ) {
                 two = true;
                 return 0;
             }
             p = m;
         }
         m = m->next();
     }
     if( p )
         noteARemoteIsPrimary(p);
     return p;
 }
Example #3
0
    /** called as the health threads get new results */
    void Manager::msgCheckNewState() {
        {
            theReplSet->assertValid();
            rs->assertValid();

            RSBase::lock lk(rs);

            if( busyWithElectSelf ) return;
            
            checkElectableSet();
            checkAuth();

            const Member *p = rs->box.getPrimary();
            if( p && p != rs->_self ) {
                if( !p->hbinfo().up() ||
                        !p->hbinfo().hbstate.primary() ) {
                    p = 0;
                    rs->box.setOtherPrimary(0);
                }
            }

            const Member *p2;
            {
                bool two;
                p2 = findOtherPrimary(two);
                if( two ) {
                    /* two other nodes think they are primary (asynchronously polled) -- wait for things to settle down. */
                    log() << "replSet info two primaries (transiently)" << rsLog;
                    return;
                }
            }

            if( p2 ) {
                noteARemoteIsPrimary(p2);
                return;
            }

            /* didn't find anyone who wants to be primary */

            if( p ) {
                /* we are already primary */

                if( p != rs->_self ) {
                    rs->sethbmsg("error p != rs->self in checkNewState");
                    log() << "replSet " << p->fullName() << rsLog;
                    log() << "replSet " << rs->_self->fullName() << rsLog;
                    return;
                }

                if( rs->elect.shouldRelinquish() ) {
                    log() << "can't see a majority of the set, relinquishing primary" << rsLog;
                    rs->relinquish();
                }

                return;
            }

            if( !rs->iAmPotentiallyHot() ) { // if not we never try to be primary
                OCCASIONALLY log() << "replSet I don't see a primary and I can't elect myself" << endl;
                return;
            }

            /* no one seems to be primary.  shall we try to elect ourself? */
            if( !rs->elect.aMajoritySeemsToBeUp() ) {
                static time_t last;
                static int n;
                int ll = 0;
                if( ++n > 5 ) ll++;
                if( last + 60 > time(0 ) ) ll++;
                log(ll) << "replSet can't see a majority, will not try to elect self" << rsLog;
                last = time(0);
                return;
            }

            if( !rs->iAmElectable() ) {
                return;
            }

            busyWithElectSelf = true; // don't try to do further elections & such while we are already working on one.
        }
        try {
            rs->elect.electSelf();
        }
        catch(RetryAfterSleepException&) {
            /* we want to process new inbounds before trying this again.  so we just put a checkNewstate in the queue for eval later. */
            requeue();
        }
        catch(...) {
            log() << "replSet error unexpected assertion in rs manager" << rsLog;
        }
        busyWithElectSelf = false;
    }
Example #4
0
    /** called as the health threads get new results */
    void Manager::msgCheckNewState() {
        {
            theReplSet->assertValid();
            rs->assertValid();

            RSBase::lock lk(rs);

            if( busyWithElectSelf ) return;
            
            checkElectableSet();
            
            const Member *p = rs->box.getPrimary();
            if( p && p != rs->_self ) {
                if( !p->hbinfo().up() ||
                        !p->hbinfo().hbstate.primary() ) {
                    p = 0;
                    rs->box.setOtherPrimary(0);
                }
            }

            const Member *p2;
            {
                bool two;
                p2 = findOtherPrimary(two);
                if( two ) {
                    /* two other nodes think they are primary (asynchronously polled) -- wait for things to settle down. */
                    log() << "replSet info two primaries (transiently)" << rsLog;
                    return;
                }
            }

            if( p2 ) {
                /* someone else thinks they are primary. */
                if( p == p2 ) {
                    // we thought the same; all set.
                    return;
                }
                if( p == 0 ) {
                    noteARemoteIsPrimary(p2);
                    return;
                }
                // todo xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
                if( p != rs->_self ) {
                    // switch primary from oldremotep->newremotep2
                    noteARemoteIsPrimary(p2);
                    return;
                }
                /* we thought we were primary, yet now someone else thinks they are. */
                if( !rs->elect.aMajoritySeemsToBeUp() ) {
                    /* we can't see a majority.  so the other node is probably the right choice. */
                    noteARemoteIsPrimary(p2);
                    return;
                }
                /* ignore for now, keep thinking we are master.
                   this could just be timing (we poll every couple seconds) or could indicate
                   a problem?  if it happens consistently for a duration of time we should
                   alert the sysadmin.
                */
                return;
            }

            /* didn't find anyone who wants to be primary */

            if( p ) {
                /* we are already primary */

                if( p != rs->_self ) {
                    rs->sethbmsg("error p != rs->self in checkNewState");
                    log() << "replSet " << p->fullName() << rsLog;
                    log() << "replSet " << rs->_self->fullName() << rsLog;
                    return;
                }

                if( rs->elect.shouldRelinquish() ) {
                    log() << "can't see a majority of the set, relinquishing primary" << rsLog;
                    rs->relinquish();
                }

                return;
            }

            if( !rs->iAmPotentiallyHot() ) // if not we never try to be primary
                return;
            
            /* no one seems to be primary.  shall we try to elect ourself? */
            if( !rs->elect.aMajoritySeemsToBeUp() ) {
                static time_t last;
                static int n;
                int ll = 0;
                if( ++n > 5 ) ll++;
                if( last + 60 > time(0 ) ) ll++;
                log(ll) << "replSet can't see a majority, will not try to elect self" << rsLog;
                last = time(0);
                return;
            }

            if( !rs->iAmElectable() ) {
                return;
            }

            busyWithElectSelf = true; // don't try to do further elections & such while we are already working on one.
        }
        try {
            rs->elect.electSelf();
        }
        catch(RetryAfterSleepException&) {
            /* we want to process new inbounds before trying this again.  so we just put a checkNewstate in the queue for eval later. */
            requeue();
        }
        catch(...) {
            log() << "replSet error unexpected assertion in rs manager" << rsLog;
        }
        busyWithElectSelf = false;
    }
Example #5
0
/** called as the health threads get new results */
void Manager::msgCheckNewState() {
    bool authIssue = false;
    {
        theReplSet->assertValid();
        rs->assertValid();

        boost::unique_lock<boost::mutex> lock(rs->stateChangeMutex);
        {
            RSBase::lock lk(rs);

            if( busyWithElectSelf ) return;

            checkElectableSet();
            authIssue = checkAuth();
            if (!authIssue) {
                const Member *p = rs->box.getPrimary();
                if( p && p != rs->_self ) {
                    if( !p->hbinfo().up() ||
                            !p->hbinfo().hbstate.primary() ) {
                        p = 0;
                        rs->box.setOtherPrimary(0);
                    }
                }

                const Member *p2;
                {
                    bool two;
                    p2 = findOtherPrimary(two);
                    if( two ) {
                        /* two other nodes think they are primary (asynchronously polled) -- wait for things to settle down. */
                        log() << "replSet info two primaries (transiently)" << rsLog;
                        return;
                    }
                }

                if( p2 ) {
                    noteARemoteIsPrimary(p2);
                    return;
                }

                /* didn't find anyone who wants to be primary */

                if( p ) {
                    /* we are already primary */

                    if( p != rs->_self ) {
                        rs->sethbmsg("error p != rs->self in checkNewState");
                        log() << "replSet " << p->fullName() << rsLog;
                        log() << "replSet " << rs->_self->fullName() << rsLog;
                        return;
                    }

                    if( rs->elect.shouldRelinquish() ) {
                        log() << "can't see a majority of the set, relinquishing primary" << rsLog;
                        rs->relinquish();
                    }

                    if (GTID::cmp(theReplSet->gtidManager->getLiveState(), theReplSet->lastOtherGTID()) < 0) {
                        // this can happen if we transiently have two primaries, which can
                        // happen if a primary loses contact with the replica set,
                        // triggering an election, but it connects back before it has a
                        // chance to step down
                        log() << "we see a secondary that is ahead, relinquishing primary" << rsLog;
                        rs->relinquish();
                    }

                    return;
                }

                if( !rs->iAmPotentiallyHot() ) { // if not we never try to be primary
                    OCCASIONALLY log() << "replSet I don't see a primary and I can't elect myself" << endl;
                    return;
                }

                /* no one seems to be primary.  shall we try to elect ourself? */
                if( !rs->elect.aMajoritySeemsToBeUp() ) {
                    static time_t last;
                    static int n;
                    int ll = 0;
                    if( ++n > 5 ) ll++;
                    if( last + 60 > time(0 ) ) ll++;
                    LOG(ll) << "replSet can't see a majority, will not try to elect self" << rsLog;
                    last = time(0);
                    return;
                }

                if( !rs->iAmElectable() ) {
                    return;
                }

                busyWithElectSelf = true; // don't try to do further elections & such while we are already working on one.
            }
        }
        // blockSync outside of rslock
        // can't hold rslock because we may try to stop the opsync thread
        if (authIssue) {
            {
                RSBase::lock lk(rs);
                if (rs->box.getPrimary() == rs->_self) {
                    log() << "auth problems, relinquishing primary" << rsLog;
                    rs->relinquish();
                }
            }
            rs->blockSync(true);
            return;
        }
    }
    try {
        rs->elect.electSelf();
    }
    catch(RetryAfterSleepException&) {
        /* we want to process new inbounds before trying this again.  so we just put a checkNewstate in the queue for eval later. */
        requeue();
    }
    catch(...) {
        log() << "replSet error unexpected assertion in rs manager" << rsLog;
    }
    busyWithElectSelf = false;
}