void ClusterMgr::threadMain() { startup(); NdbApiSignal signal(numberToRef(API_CLUSTERMGR, theFacade.ownId())); signal.theVerId_signalNumber = GSN_API_REGREQ; signal.theTrace = 0; signal.theLength = ApiRegReq::SignalLength; ApiRegReq * req = CAST_PTR(ApiRegReq, signal.getDataPtrSend()); req->ref = numberToRef(API_CLUSTERMGR, theFacade.ownId()); req->version = NDB_VERSION; req->mysql_version = NDB_MYSQL_VERSION_D; NdbApiSignal nodeFail_signal(numberToRef(API_CLUSTERMGR, getOwnNodeId())); nodeFail_signal.theVerId_signalNumber = GSN_NODE_FAILREP; nodeFail_signal.theReceiversBlockNumber = API_CLUSTERMGR; nodeFail_signal.theTrace = 0; nodeFail_signal.theLength = NodeFailRep::SignalLengthLong; NDB_TICKS now = NdbTick_getCurrentTicks(); while(!theStop) { /* Sleep 1/5 of minHeartBeatInterval between each check */ const NDB_TICKS before = now; for (Uint32 i = 0; i<5; i++) { NdbSleep_MilliSleep(minHeartBeatInterval/5); { /** * start_poll does lock the trp_client and complete_poll * releases this lock. This means that this protects * against concurrent calls to send signals in ArbitMgr. * We do however need to protect also against concurrent * close in doStop, so to avoid this problem we need to * also lock clusterMgrThreadMutex before we start the * poll. */ Guard g(clusterMgrThreadMutex); start_poll(); do_poll(0); complete_poll(); } } now = NdbTick_getCurrentTicks(); const Uint32 timeSlept = (Uint32)NdbTick_Elapsed(before, now).milliSec(); lock(); if (m_cluster_state == CS_waiting_for_clean_cache && theFacade.m_globalDictCache) { if (!global_flag_skip_waiting_for_clean_cache) { theFacade.m_globalDictCache->lock(); unsigned sz= theFacade.m_globalDictCache->get_size(); theFacade.m_globalDictCache->unlock(); if (sz) { unlock(); continue; } } m_cluster_state = CS_waiting_for_first_connect; } NodeFailRep * nodeFailRep = CAST_PTR(NodeFailRep, nodeFail_signal.getDataPtrSend()); nodeFailRep->noOfNodes = 0; NodeBitmask::clear(nodeFailRep->theAllNodes); for (int i = 1; i < MAX_NODES; i++) { /** * Send register request (heartbeat) to all available nodes * at specified timing intervals */ const NodeId nodeId = i; // Check array bounds + don't allow node 0 to be touched assert(nodeId > 0 && nodeId < MAX_NODES); Node & cm_node = theNodes[nodeId]; trp_node & theNode = cm_node; if (!theNode.defined) continue; if (theNode.is_connected() == false) { theFacade.doConnect(nodeId); continue; } if (!theNode.compatible) { continue; } if (nodeId == getOwnNodeId()) { /** * Don't send HB to self more than once * (once needed to avoid weird special cases in e.g ConfigManager) */ if (m_sent_API_REGREQ_to_myself) { continue; } } cm_node.hbCounter += timeSlept; if (cm_node.hbCounter >= m_max_api_reg_req_interval || cm_node.hbCounter >= cm_node.hbFrequency) { /** * It is now time to send a new Heartbeat */ if (cm_node.hbCounter >= cm_node.hbFrequency) { cm_node.hbMissed++; cm_node.hbCounter = 0; } if (theNode.m_info.m_type != NodeInfo::DB) signal.theReceiversBlockNumber = API_CLUSTERMGR; else signal.theReceiversBlockNumber = QMGR; #ifdef DEBUG_REG ndbout_c("ClusterMgr: Sending API_REGREQ to node %d", (int)nodeId); #endif if (nodeId == getOwnNodeId()) { /* Set flag to ensure we only send once to ourself */ m_sent_API_REGREQ_to_myself = true; } raw_sendSignal(&signal, nodeId); }//if if (cm_node.hbMissed == 4 && cm_node.hbFrequency > 0) { nodeFailRep->noOfNodes++; NodeBitmask::set(nodeFailRep->theAllNodes, nodeId); } } flush_send_buffers(); unlock(); if (nodeFailRep->noOfNodes) { lock(); raw_sendSignal(&nodeFail_signal, getOwnNodeId()); flush_send_buffers(); unlock(); } } }
void ClusterMgr::threadMain() { startup(); NdbApiSignal signal(numberToRef(API_CLUSTERMGR, theFacade.ownId())); signal.theVerId_signalNumber = GSN_API_REGREQ; signal.theTrace = 0; signal.theLength = ApiRegReq::SignalLength; ApiRegReq * req = CAST_PTR(ApiRegReq, signal.getDataPtrSend()); req->ref = numberToRef(API_CLUSTERMGR, theFacade.ownId()); req->version = NDB_VERSION; req->mysql_version = NDB_MYSQL_VERSION_D; NdbApiSignal nodeFail_signal(numberToRef(API_CLUSTERMGR, getOwnNodeId())); nodeFail_signal.theVerId_signalNumber = GSN_NODE_FAILREP; nodeFail_signal.theReceiversBlockNumber = API_CLUSTERMGR; nodeFail_signal.theTrace = 0; nodeFail_signal.theLength = NodeFailRep::SignalLengthLong; NDB_TICKS timeSlept = 100; NDB_TICKS now = NdbTick_CurrentMillisecond(); while(!theStop) { /* Sleep at 100ms between each heartbeat check */ NDB_TICKS before = now; for (Uint32 i = 0; i<5; i++) { NdbSleep_MilliSleep(20); { Guard g(clusterMgrThreadMutex); /** * Protect from ArbitMgr sending signals while we poll */ start_poll(); do_poll(0); complete_poll(); } } now = NdbTick_CurrentMillisecond(); timeSlept = (now - before); if (m_cluster_state == CS_waiting_for_clean_cache && theFacade.m_globalDictCache) { if (!global_flag_skip_waiting_for_clean_cache) { theFacade.m_globalDictCache->lock(); unsigned sz= theFacade.m_globalDictCache->get_size(); theFacade.m_globalDictCache->unlock(); if (sz) continue; } m_cluster_state = CS_waiting_for_first_connect; } NodeFailRep * nodeFailRep = CAST_PTR(NodeFailRep, nodeFail_signal.getDataPtrSend()); nodeFailRep->noOfNodes = 0; NodeBitmask::clear(nodeFailRep->theAllNodes); lock(); for (int i = 1; i < MAX_NODES; i++){ /** * Send register request (heartbeat) to all available nodes * at specified timing intervals */ const NodeId nodeId = i; // Check array bounds + don't allow node 0 to be touched assert(nodeId > 0 && nodeId < MAX_NODES); Node & cm_node = theNodes[nodeId]; trp_node & theNode = cm_node; if (!theNode.defined) continue; if (theNode.is_connected() == false){ theFacade.doConnect(nodeId); continue; } if (!theNode.compatible){ continue; } if (nodeId == getOwnNodeId() && theNode.is_confirmed()) { /** * Don't send HB to self more than once * (once needed to avoid weird special cases in e.g ConfigManager) */ continue; } cm_node.hbCounter += (Uint32)timeSlept; if (cm_node.hbCounter >= m_max_api_reg_req_interval || cm_node.hbCounter >= cm_node.hbFrequency) { /** * It is now time to send a new Heartbeat */ if (cm_node.hbCounter >= cm_node.hbFrequency) { cm_node.hbMissed++; cm_node.hbCounter = 0; } if (theNode.m_info.m_type != NodeInfo::DB) signal.theReceiversBlockNumber = API_CLUSTERMGR; else signal.theReceiversBlockNumber = QMGR; #ifdef DEBUG_REG ndbout_c("ClusterMgr: Sending API_REGREQ to node %d", (int)nodeId); #endif raw_sendSignal(&signal, nodeId); }//if if (cm_node.hbMissed == 4 && cm_node.hbFrequency > 0) { nodeFailRep->noOfNodes++; NodeBitmask::set(nodeFailRep->theAllNodes, nodeId); } } flush_send_buffers(); unlock(); if (nodeFailRep->noOfNodes) { lock(); raw_sendSignal(&nodeFail_signal, getOwnNodeId()); flush_send_buffers(); unlock(); } } }