int NdbRestarter::connect(){ disconnect(); handle = ndb_mgm_create_handle(); if (handle == NULL){ g_err << "handle == NULL" << endl; return -1; } g_info << "Connecting to mgmsrv at " << addr.c_str() << endl; if (ndb_mgm_set_connectstring(handle,addr.c_str())) { MGMERR(handle); g_err << "Connection to " << addr.c_str() << " failed" << endl; return -1; } if (ndb_mgm_connect(handle, 0, 0, 0) == -1) { MGMERR(handle); g_err << "Connection to " << addr.c_str() << " failed" << endl; return -1; } connected = true; return 0; }
int NdbRestarter::restartAll(bool initial, bool nostart, bool abort){ if (!isConnected()) return -1; if (ndb_mgm_restart2(handle, 0, NULL, initial, 1, abort) == -1) { MGMERR(handle); g_err << "Could not restart(stop) all nodes " << endl; // return -1; Continue anyway - Magnus } if (waitClusterNoStart(60) != 0){ g_err << "Cluster didnt enter STATUS_NOT_STARTED within 60s" << endl; return -1; } if(nostart){ g_debug << "restartAll: nostart == true" << endl; return 0; } if (ndb_mgm_start(handle, 0, NULL) == -1) { MGMERR(handle); g_err << "Could not restart(start) all nodes " << endl; return -1; } return 0; }
int NdbRestarter::restartNodes(int * nodes, int cnt, Uint32 flags) { if (!isConnected()) return -1; int ret = 0; int unused; if ((ret = ndb_mgm_restart4(handle, cnt, nodes, (flags & NRRF_INITIAL), (flags & NRRF_NOSTART), (flags & NRRF_ABORT), (flags & NRRF_FORCE), &unused)) <= 0) { /** * ndb_mgm_restart4 returned error, one reason could * be that the node have not stopped fast enough! * Check status of the node to see if it's on the * way down. If that's the case ignore the error */ if (getStatus() != 0) return -1; g_info << "ndb_mgm_restart4 returned with error, checking node state" << endl; for (int j = 0; j<cnt; j++) { int _nodeId = nodes[j]; for(unsigned i = 0; i < ndbNodes.size(); i++) { if(ndbNodes[i].node_id == _nodeId) { g_info <<_nodeId<<": status="<<ndbNodes[i].node_status<<endl; /* Node found check state */ switch(ndbNodes[i].node_status){ case NDB_MGM_NODE_STATUS_RESTARTING: case NDB_MGM_NODE_STATUS_SHUTTING_DOWN: break; default: MGMERR(handle); g_err << "Could not stop node with id = "<< _nodeId << endl; return -1; } } } } } if ((flags & NRRF_NOSTART) == 0) { wait_until_ready(nodes, cnt); } return 0; }
int NdbRestarter::startNodes(const int * nodes, int num_nodes){ if (!isConnected()) return -1; if (ndb_mgm_start(handle, num_nodes, nodes) != num_nodes) { MGMERR(handle); g_err << "Could not start all nodes " << endl; return -1; } return 0; }
int NdbRestarter::startAll(){ if (!isConnected()) return -1; if (ndb_mgm_start(handle, 0, NULL) == -1) { MGMERR(handle); g_err << "Could not start all nodes " << endl; return -1; } return 0; }
int NdbRestarter::insertErrorInNode(int _nodeId, int _error){ if (!isConnected()) return -1; ndb_mgm_reply reply; reply.return_code = 0; if (ndb_mgm_insert_error(handle, _nodeId, _error, &reply) == -1){ MGMERR(handle); g_err << "Could not insert error in node with id = "<< _nodeId << endl; } if(reply.return_code != 0){ g_err << "Error: " << reply.message << endl; } return 0; }
int NdbRestarter::exitSingleUserMode(){ if (!isConnected()) return -1; ndb_mgm_reply reply; reply.return_code = 0; if (ndb_mgm_exit_single_user(handle, &reply) == -1){ MGMERR(handle); g_err << "Could not exit single user mode " << endl; } if(reply.return_code != 0){ g_err << "Error: " << reply.message << endl; } return reply.return_code; }
int NdbRestarter::dumpStateOneNode(int _nodeId, const int * _args, int _num_args){ if (!isConnected()) return -1; ndb_mgm_reply reply; reply.return_code = 0; if (ndb_mgm_dump_state(handle, _nodeId, _args, _num_args, &reply) == -1){ MGMERR(handle); g_err << "Could not dump state in node with id = "<< _nodeId << endl; } if(reply.return_code != 0){ g_err << "Error: " << reply.message << endl; } return reply.return_code; }
int NdbRestarter::restartOneDbNode(int _nodeId, bool inital, bool nostart, bool abort){ if (!isConnected()) return -1; int ret = 0; if ((ret = ndb_mgm_restart2(handle, 1, &_nodeId, inital, nostart, abort)) <= 0) { /** * ndb_mgm_restart2 returned error, one reason could * be that the node have not stopped fast enough! * Check status of the node to see if it's on the * way down. If that's the case ignore the error */ if (getStatus() != 0) return -1; g_info << "ndb_mgm_restart2 returned with error, checking node state" << endl; for(size_t i = 0; i < ndbNodes.size(); i++){ if(ndbNodes[i].node_id == _nodeId){ g_info <<_nodeId<<": status="<<ndbNodes[i].node_status<<endl; /* Node found check state */ switch(ndbNodes[i].node_status){ case NDB_MGM_NODE_STATUS_RESTARTING: case NDB_MGM_NODE_STATUS_SHUTTING_DOWN: return 0; default: break; } } } MGMERR(handle); g_err << "Could not stop node with id = "<< _nodeId << endl; return -1; } return 0; }
int NdbRestarter::enterSingleUserMode(int _nodeId){ if (!isConnected()) return -1; ndb_mgm_reply reply; reply.return_code = 0; if (ndb_mgm_enter_single_user(handle, _nodeId, &reply) == -1){ MGMERR(handle); g_err << "Could not enter single user mode api node = "<< _nodeId << endl; } if(reply.return_code != 0){ g_err << "Error: " << reply.message << endl; } return reply.return_code; }
int NdbRestarter::getStatus(){ int retries = 0; struct ndb_mgm_cluster_state * status; struct ndb_mgm_node_state * node; ndbNodes.clear(); mgmNodes.clear(); apiNodes.clear(); if (!isConnected()) return -1; while(retries < 10){ status = ndb_mgm_get_status(handle); if (status == NULL){ ndbout << "status==NULL, retries="<<retries<<endl; MGMERR(handle); retries++; continue; } for (int i = 0; i < status->no_of_nodes; i++){ node = &status->node_states[i]; switch(node->node_type){ case NDB_MGM_NODE_TYPE_NDB: ndbNodes.push_back(*node); break; case NDB_MGM_NODE_TYPE_MGM: mgmNodes.push_back(*node); break; case NDB_MGM_NODE_TYPE_API: apiNodes.push_back(*node); break; default: if(node->node_status == NDB_MGM_NODE_STATUS_UNKNOWN || node->node_status == NDB_MGM_NODE_STATUS_NO_CONTACT){ retries++; ndbNodes.clear(); mgmNodes.clear(); apiNodes.clear(); free(status); status = NULL; i = status->no_of_nodes; ndbout << "kalle"<< endl; break; } abort(); break; } } if(status == 0){ ndbout << "status == 0" << endl; continue; } free(status); return 0; } g_err << "getStatus failed" << endl; return -1; }
static int waitClusterStatus(const char* _addr, ndb_mgm_node_status _status) { int _startphase = -1; /* Ignore SIGPIPE */ signal(SIGPIPE, SIG_IGN); handle = ndb_mgm_create_handle(); if (handle == NULL){ g_err << "Could not create ndb_mgm handle" << endl; return -1; } g_info << "Connecting to mgmsrv at " << _addr << endl; if (ndb_mgm_set_connectstring(handle, _addr)) { MGMERR(handle); g_err << "Connectstring " << _addr << " invalid" << endl; return -1; } if (ndb_mgm_connect(handle,0,0,1)) { MGMERR(handle); g_err << "Connection to " << _addr << " failed" << endl; return -1; } int attempts = 0; int resetAttempts = 0; const int MAX_RESET_ATTEMPTS = 10; bool allInState = false; int timeout_ms= _timeout * 10; /* In number of 100 milliseconds */ while (allInState == false){ if (_timeout > 0 && attempts > _timeout){ /** * Timeout has expired waiting for the nodes to enter * the state we want */ bool waitMore = false; /** * Make special check if we are waiting for * cluster to become started */ if(_status == NDB_MGM_NODE_STATUS_STARTED){ waitMore = true; /** * First check if any node is not starting * then it's no idea to wait anymore */ for (size_t n = 0; n < ndbNodes.size(); n++){ if (ndbNodes[n].node_status != NDB_MGM_NODE_STATUS_STARTED && ndbNodes[n].node_status != NDB_MGM_NODE_STATUS_STARTING) waitMore = false; } } if (!waitMore || resetAttempts > MAX_RESET_ATTEMPTS){ g_err << "waitNodeState(" << ndb_mgm_get_node_status_string(_status) <<", "<<_startphase<<")" << " timeout after " << attempts <<" attemps" << endl; return -1; } g_err << "waitNodeState(" << ndb_mgm_get_node_status_string(_status) <<", "<<_startphase<<")" << " resetting number of attempts " << resetAttempts << endl; attempts = 0; resetAttempts++; } if (getStatus() != 0){ return -1; } /* Assume all nodes are in state(if there is any) */ allInState = (ndbNodes.size() > 0); /* Loop through all nodes and check their state */ for (size_t n = 0; n < ndbNodes.size(); n++) { ndb_mgm_node_state* ndbNode = &ndbNodes[n]; assert(ndbNode != NULL); g_info << "Node " << ndbNode->node_id << ": " << ndb_mgm_get_node_status_string(ndbNode->node_status)<< endl; if (ndbNode->node_status != _status) allInState = false; } if (!allInState) { g_info << "Waiting for cluster enter state " << ndb_mgm_get_node_status_string(_status)<< endl; NdbSleep_MilliSleep(100); } attempts++; } return 0; }
int getStatus(){ int retries = 0; struct ndb_mgm_cluster_state * status; struct ndb_mgm_node_state * node; ndbNodes.clear(); while(retries < 10){ status = ndb_mgm_get_status(handle); if (status == NULL){ ndbout << "status==NULL, retries="<<retries<<endl; MGMERR(handle); retries++; ndb_mgm_disconnect(handle); if (ndb_mgm_connect(handle,0,0,1)) { MGMERR(handle); g_err << "Reconnect failed" << endl; break; } continue; } int count = status->no_of_nodes; for (int i = 0; i < count; i++){ node = &status->node_states[i]; switch(node->node_type){ case NDB_MGM_NODE_TYPE_NDB: ndbNodes.push_back(*node); break; case NDB_MGM_NODE_TYPE_MGM: /* Don't care about MGM nodes */ break; case NDB_MGM_NODE_TYPE_API: /* Don't care about API nodes */ break; default: if(node->node_status == NDB_MGM_NODE_STATUS_UNKNOWN || node->node_status == NDB_MGM_NODE_STATUS_NO_CONTACT){ retries++; ndbNodes.clear(); free(status); status = NULL; count = 0; ndbout << "kalle"<< endl; break; } abort(); break; } } if(status == 0){ ndbout << "status == 0" << endl; continue; } free(status); return 0; } return -1; }
int NdbRestarter::getStatus(){ int retries = 0; struct ndb_mgm_cluster_state * status; struct ndb_mgm_node_state * node; ndbNodes.clear(); mgmNodes.clear(); apiNodes.clear(); if (!isConnected()) return -1; while(retries < 10){ status = ndb_mgm_get_status(handle); if (status == NULL){ if (m_reconnect){ if (connect() == 0){ g_err << "Reconnected..." << endl; continue; } const int err = ndb_mgm_get_latest_error(handle); if (err == NDB_MGM_COULD_NOT_CONNECT_TO_SOCKET){ g_err << "Could not connect to socket, sleep and retry" << endl; retries= 0; NdbSleep_SecSleep(1); continue; } } const int err = ndb_mgm_get_latest_error(handle); ndbout << "status==NULL, retries="<<retries<< " err=" << err << endl; MGMERR(handle); retries++; continue; } for (int i = 0; i < status->no_of_nodes; i++){ node = &status->node_states[i]; switch(node->node_type){ case NDB_MGM_NODE_TYPE_NDB: ndbNodes.push_back(*node); break; case NDB_MGM_NODE_TYPE_MGM: mgmNodes.push_back(*node); break; case NDB_MGM_NODE_TYPE_API: apiNodes.push_back(*node); break; default: if(node->node_status == NDB_MGM_NODE_STATUS_UNKNOWN || node->node_status == NDB_MGM_NODE_STATUS_NO_CONTACT){ retries++; ndbNodes.clear(); mgmNodes.clear(); apiNodes.clear(); free(status); status = NULL; i = status->no_of_nodes; ndbout << "kalle"<< endl; break; } abort(); break; } } if(status == 0){ ndbout << "status == 0" << endl; continue; } free(status); return 0; } g_err << "getStatus failed" << endl; return -1; }