void ClusterMgr::reportConnected(NodeId nodeId) { DBUG_ENTER("ClusterMgr::reportConnected"); DBUG_PRINT("info", ("nodeId: %u", nodeId)); /** * Ensure that we are sending heartbeat every 100 ms * until we have got the first reply from NDB providing * us with the real time-out period to use. */ assert(nodeId > 0 && nodeId < MAX_NODES); if (nodeId == getOwnNodeId()) { noOfConnectedNodes--; // Don't count self... } noOfConnectedNodes++; Node & cm_node = theNodes[nodeId]; trp_node & theNode = cm_node; cm_node.hbMissed = 0; cm_node.hbCounter = 0; cm_node.hbFrequency = 0; assert(theNode.is_connected() == false); /** * make sure the node itself is marked connected even * if first API_REGCONF has not arrived */ theNode.set_connected(true); theNode.m_state.m_connected_nodes.set(nodeId); theNode.m_info.m_version = 0; theNode.compatible = true; theNode.nfCompleteRep = true; theNode.m_node_fail_rep = false; theNode.m_state.startLevel = NodeState::SL_NOTHING; theNode.minDbVersion = 0; /** * We know that we have clusterMgrThreadMutex and trp_client::mutex * but we don't know if we are polling...and for_each can * only be used by a poller... * * Send signal to self, so that we can do this when receiving a signal */ NdbApiSignal signal(numberToRef(API_CLUSTERMGR, getOwnNodeId())); signal.theVerId_signalNumber = GSN_CONNECT_REP; signal.theReceiversBlockNumber = API_CLUSTERMGR; signal.theTrace = 0; signal.theLength = 1; signal.getDataPtrSend()[0] = nodeId; raw_sendSignal(&signal, getOwnNodeId()); DBUG_VOID_RETURN; }
SendStatus SignalSender::sendSignal(Uint16 nodeId, const SimpleSignal * s) { int ret = raw_sendSignal((NdbApiSignal*)&s->header, nodeId, s->ptr, s->header.m_noOfSections); if (ret == 0) { do_forceSend(); return SEND_OK; } return SEND_DISCONNECTED; }
inline int NdbImpl::sendSignal(NdbApiSignal * signal, Uint32 nodeId) { if (getIsNodeSendable(nodeId)) { if (likely(recordGSN(signal->theVerId_signalNumber))) { incClientStat(Ndb::BytesSentCount, signal->getLength() << 2); } return raw_sendSignal(signal, nodeId); } return -1; }
inline int NdbImpl::sendSignal(NdbApiSignal * signal, Uint32 nodeId, const GenericSectionPtr ptr[3], Uint32 secs) { if (getIsNodeSendable(nodeId)) { if (likely(recordGSN(signal->theVerId_signalNumber))) { incClientStat(Ndb::BytesSentCount, ((signal->getLength() << 2) + ((secs > 2)? ptr[2].sz << 2 : 0) + ((secs > 1)? ptr[1].sz << 2: 0) + ((secs > 0)? ptr[0].sz << 2: 0))); } return raw_sendSignal(signal, nodeId, ptr, secs); } return -1; }
void ClusterMgr::reportDisconnected(NodeId nodeId) { assert(nodeId > 0 && nodeId < MAX_NODES); assert(noOfConnectedNodes > 0); /** * We know that we have trp_client lock * but we don't know if we are polling...and for_each can * only be used by a poller... * * Send signal to self, so that we can do this when receiving a signal */ NdbApiSignal signal(numberToRef(API_CLUSTERMGR, getOwnNodeId())); signal.theVerId_signalNumber = GSN_DISCONNECT_REP; signal.theReceiversBlockNumber = API_CLUSTERMGR; signal.theTrace = 0; signal.theLength = DisconnectRep::SignalLength; DisconnectRep * rep = CAST_PTR(DisconnectRep, signal.getDataPtrSend()); rep->nodeId = nodeId; rep->err = 0; raw_sendSignal(&signal, getOwnNodeId()); }
void ClusterMgr::threadMain() { startup(); NdbApiSignal signal(numberToRef(API_CLUSTERMGR, theFacade.ownId())); signal.theVerId_signalNumber = GSN_API_REGREQ; signal.theTrace = 0; signal.theLength = ApiRegReq::SignalLength; ApiRegReq * req = CAST_PTR(ApiRegReq, signal.getDataPtrSend()); req->ref = numberToRef(API_CLUSTERMGR, theFacade.ownId()); req->version = NDB_VERSION; req->mysql_version = NDB_MYSQL_VERSION_D; NdbApiSignal nodeFail_signal(numberToRef(API_CLUSTERMGR, getOwnNodeId())); nodeFail_signal.theVerId_signalNumber = GSN_NODE_FAILREP; nodeFail_signal.theReceiversBlockNumber = API_CLUSTERMGR; nodeFail_signal.theTrace = 0; nodeFail_signal.theLength = NodeFailRep::SignalLengthLong; NDB_TICKS now = NdbTick_getCurrentTicks(); while(!theStop) { /* Sleep 1/5 of minHeartBeatInterval between each check */ const NDB_TICKS before = now; for (Uint32 i = 0; i<5; i++) { NdbSleep_MilliSleep(minHeartBeatInterval/5); { /** * start_poll does lock the trp_client and complete_poll * releases this lock. This means that this protects * against concurrent calls to send signals in ArbitMgr. * We do however need to protect also against concurrent * close in doStop, so to avoid this problem we need to * also lock clusterMgrThreadMutex before we start the * poll. */ Guard g(clusterMgrThreadMutex); start_poll(); do_poll(0); complete_poll(); } } now = NdbTick_getCurrentTicks(); const Uint32 timeSlept = (Uint32)NdbTick_Elapsed(before, now).milliSec(); lock(); if (m_cluster_state == CS_waiting_for_clean_cache && theFacade.m_globalDictCache) { if (!global_flag_skip_waiting_for_clean_cache) { theFacade.m_globalDictCache->lock(); unsigned sz= theFacade.m_globalDictCache->get_size(); theFacade.m_globalDictCache->unlock(); if (sz) { unlock(); continue; } } m_cluster_state = CS_waiting_for_first_connect; } NodeFailRep * nodeFailRep = CAST_PTR(NodeFailRep, nodeFail_signal.getDataPtrSend()); nodeFailRep->noOfNodes = 0; NodeBitmask::clear(nodeFailRep->theAllNodes); for (int i = 1; i < MAX_NODES; i++) { /** * Send register request (heartbeat) to all available nodes * at specified timing intervals */ const NodeId nodeId = i; // Check array bounds + don't allow node 0 to be touched assert(nodeId > 0 && nodeId < MAX_NODES); Node & cm_node = theNodes[nodeId]; trp_node & theNode = cm_node; if (!theNode.defined) continue; if (theNode.is_connected() == false) { theFacade.doConnect(nodeId); continue; } if (!theNode.compatible) { continue; } if (nodeId == getOwnNodeId()) { /** * Don't send HB to self more than once * (once needed to avoid weird special cases in e.g ConfigManager) */ if (m_sent_API_REGREQ_to_myself) { continue; } } cm_node.hbCounter += timeSlept; if (cm_node.hbCounter >= m_max_api_reg_req_interval || cm_node.hbCounter >= cm_node.hbFrequency) { /** * It is now time to send a new Heartbeat */ if (cm_node.hbCounter >= cm_node.hbFrequency) { cm_node.hbMissed++; cm_node.hbCounter = 0; } if (theNode.m_info.m_type != NodeInfo::DB) signal.theReceiversBlockNumber = API_CLUSTERMGR; else signal.theReceiversBlockNumber = QMGR; #ifdef DEBUG_REG ndbout_c("ClusterMgr: Sending API_REGREQ to node %d", (int)nodeId); #endif if (nodeId == getOwnNodeId()) { /* Set flag to ensure we only send once to ourself */ m_sent_API_REGREQ_to_myself = true; } raw_sendSignal(&signal, nodeId); }//if if (cm_node.hbMissed == 4 && cm_node.hbFrequency > 0) { nodeFailRep->noOfNodes++; NodeBitmask::set(nodeFailRep->theAllNodes, nodeId); } } flush_send_buffers(); unlock(); if (nodeFailRep->noOfNodes) { lock(); raw_sendSignal(&nodeFail_signal, getOwnNodeId()); flush_send_buffers(); unlock(); } } }
void ClusterMgr::threadMain() { startup(); NdbApiSignal signal(numberToRef(API_CLUSTERMGR, theFacade.ownId())); signal.theVerId_signalNumber = GSN_API_REGREQ; signal.theTrace = 0; signal.theLength = ApiRegReq::SignalLength; ApiRegReq * req = CAST_PTR(ApiRegReq, signal.getDataPtrSend()); req->ref = numberToRef(API_CLUSTERMGR, theFacade.ownId()); req->version = NDB_VERSION; req->mysql_version = NDB_MYSQL_VERSION_D; NdbApiSignal nodeFail_signal(numberToRef(API_CLUSTERMGR, getOwnNodeId())); nodeFail_signal.theVerId_signalNumber = GSN_NODE_FAILREP; nodeFail_signal.theReceiversBlockNumber = API_CLUSTERMGR; nodeFail_signal.theTrace = 0; nodeFail_signal.theLength = NodeFailRep::SignalLengthLong; NDB_TICKS timeSlept = 100; NDB_TICKS now = NdbTick_CurrentMillisecond(); while(!theStop) { /* Sleep at 100ms between each heartbeat check */ NDB_TICKS before = now; for (Uint32 i = 0; i<5; i++) { NdbSleep_MilliSleep(20); { Guard g(clusterMgrThreadMutex); /** * Protect from ArbitMgr sending signals while we poll */ start_poll(); do_poll(0); complete_poll(); } } now = NdbTick_CurrentMillisecond(); timeSlept = (now - before); if (m_cluster_state == CS_waiting_for_clean_cache && theFacade.m_globalDictCache) { if (!global_flag_skip_waiting_for_clean_cache) { theFacade.m_globalDictCache->lock(); unsigned sz= theFacade.m_globalDictCache->get_size(); theFacade.m_globalDictCache->unlock(); if (sz) continue; } m_cluster_state = CS_waiting_for_first_connect; } NodeFailRep * nodeFailRep = CAST_PTR(NodeFailRep, nodeFail_signal.getDataPtrSend()); nodeFailRep->noOfNodes = 0; NodeBitmask::clear(nodeFailRep->theAllNodes); lock(); for (int i = 1; i < MAX_NODES; i++){ /** * Send register request (heartbeat) to all available nodes * at specified timing intervals */ const NodeId nodeId = i; // Check array bounds + don't allow node 0 to be touched assert(nodeId > 0 && nodeId < MAX_NODES); Node & cm_node = theNodes[nodeId]; trp_node & theNode = cm_node; if (!theNode.defined) continue; if (theNode.is_connected() == false){ theFacade.doConnect(nodeId); continue; } if (!theNode.compatible){ continue; } if (nodeId == getOwnNodeId() && theNode.is_confirmed()) { /** * Don't send HB to self more than once * (once needed to avoid weird special cases in e.g ConfigManager) */ continue; } cm_node.hbCounter += (Uint32)timeSlept; if (cm_node.hbCounter >= m_max_api_reg_req_interval || cm_node.hbCounter >= cm_node.hbFrequency) { /** * It is now time to send a new Heartbeat */ if (cm_node.hbCounter >= cm_node.hbFrequency) { cm_node.hbMissed++; cm_node.hbCounter = 0; } if (theNode.m_info.m_type != NodeInfo::DB) signal.theReceiversBlockNumber = API_CLUSTERMGR; else signal.theReceiversBlockNumber = QMGR; #ifdef DEBUG_REG ndbout_c("ClusterMgr: Sending API_REGREQ to node %d", (int)nodeId); #endif raw_sendSignal(&signal, nodeId); }//if if (cm_node.hbMissed == 4 && cm_node.hbFrequency > 0) { nodeFailRep->noOfNodes++; NodeBitmask::set(nodeFailRep->theAllNodes, nodeId); } } flush_send_buffers(); unlock(); if (nodeFailRep->noOfNodes) { lock(); raw_sendSignal(&nodeFail_signal, getOwnNodeId()); flush_send_buffers(); unlock(); } } }
void ClusterMgr::forceHB() { theFacade.lock_poll_mutex(); if(waitingForHB) { NdbCondition_WaitTimeout(waitForHBCond, theFacade.thePollMutex, 1000); theFacade.unlock_poll_mutex(); return; } waitingForHB= true; NodeBitmask ndb_nodes; ndb_nodes.clear(); waitForHBFromNodes.clear(); for(Uint32 i = 1; i < MAX_NDB_NODES; i++) { const trp_node &node= getNodeInfo(i); if(!node.defined) continue; if(node.m_info.getType() == NodeInfo::DB) { ndb_nodes.set(i); waitForHBFromNodes.bitOR(node.m_state.m_connected_nodes); } } waitForHBFromNodes.bitAND(ndb_nodes); theFacade.unlock_poll_mutex(); #ifdef DEBUG_REG char buf[128]; ndbout << "Waiting for HB from " << waitForHBFromNodes.getText(buf) << endl; #endif NdbApiSignal signal(numberToRef(API_CLUSTERMGR, theFacade.ownId())); signal.theVerId_signalNumber = GSN_API_REGREQ; signal.theReceiversBlockNumber = QMGR; signal.theTrace = 0; signal.theLength = ApiRegReq::SignalLength; ApiRegReq * req = CAST_PTR(ApiRegReq, signal.getDataPtrSend()); req->ref = numberToRef(API_CLUSTERMGR, theFacade.ownId()); req->version = NDB_VERSION; req->mysql_version = NDB_MYSQL_VERSION_D; { lock(); int nodeId= 0; for(int i=0; (int) NodeBitmask::NotFound != (nodeId= waitForHBFromNodes.find(i)); i= nodeId+1) { #ifdef DEBUG_REG ndbout << "FORCE HB to " << nodeId << endl; #endif raw_sendSignal(&signal, nodeId); } flush_send_buffers(); unlock(); } /* Wait for nodes to reply - if any heartbeats was sent */ theFacade.lock_poll_mutex(); if (!waitForHBFromNodes.isclear()) NdbCondition_WaitTimeout(waitForHBCond, theFacade.thePollMutex, 1000); waitingForHB= false; #ifdef DEBUG_REG ndbout << "Still waiting for HB from " << waitForHBFromNodes.getText(buf) << endl; #endif theFacade.unlock_poll_mutex(); }