Пример #1
0
int 
NdbRestarter::connect(){
  disconnect();
  handle = ndb_mgm_create_handle();   
  if (handle == NULL){
    g_err << "handle == NULL" << endl;
    return -1;
  }
  g_info << "Connecting to mgmsrv at " << addr.c_str() << endl;
  if (ndb_mgm_set_connectstring(handle,addr.c_str()))
  {
    MGMERR(handle);
    g_err  << "Connection to " << addr.c_str() << " failed" << endl;
    return -1;
  }

  if (ndb_mgm_connect(handle, 0, 0, 0) == -1)
  {
    MGMERR(handle);
    g_err  << "Connection to " << addr.c_str() << " failed" << endl;
    return -1;
  }

  connected = true;
  return 0;
}
Пример #2
0
int NdbRestarter::restartAll(bool initial,
			     bool nostart,
			     bool abort){
  
  if (!isConnected())
    return -1;

  if (ndb_mgm_restart2(handle, 0, NULL, initial, 1, abort) == -1) {
    MGMERR(handle);
    g_err  << "Could not restart(stop) all nodes " << endl;
    // return -1; Continue anyway - Magnus
  }
  
  if (waitClusterNoStart(60) != 0){
    g_err << "Cluster didnt enter STATUS_NOT_STARTED within 60s" << endl;
    return -1;
  }
  
  if(nostart){
    g_debug << "restartAll: nostart == true" << endl;
    return 0;
  }

  if (ndb_mgm_start(handle, 0, NULL) == -1) {
    MGMERR(handle);
    g_err  << "Could not restart(start) all nodes " << endl;
    return -1;
  }
  
  return 0;
}
int
NdbRestarter::restartNodes(int * nodes, int cnt,
                           Uint32 flags)
{
  if (!isConnected())
    return -1;

  int ret = 0;
  int unused;
  if ((ret = ndb_mgm_restart4(handle, cnt, nodes,
                              (flags & NRRF_INITIAL),
                              (flags & NRRF_NOSTART),
                              (flags & NRRF_ABORT),
                              (flags & NRRF_FORCE),
                              &unused)) <= 0)
  {
    /**
     * ndb_mgm_restart4 returned error, one reason could
     * be that the node have not stopped fast enough!
     * Check status of the node to see if it's on the 
     * way down. If that's the case ignore the error
     */ 

    if (getStatus() != 0)
      return -1;

    g_info << "ndb_mgm_restart4 returned with error, checking node state"
           << endl;

    for (int j = 0; j<cnt; j++)
    {
      int _nodeId = nodes[j];
      for(unsigned i = 0; i < ndbNodes.size(); i++)
      {
        if(ndbNodes[i].node_id == _nodeId)
        {
          g_info <<_nodeId<<": status="<<ndbNodes[i].node_status<<endl;
          /* Node found check state */
          switch(ndbNodes[i].node_status){
          case NDB_MGM_NODE_STATUS_RESTARTING:
          case NDB_MGM_NODE_STATUS_SHUTTING_DOWN:
            break;
          default:
            MGMERR(handle);
            g_err  << "Could not stop node with id = "<< _nodeId << endl;
            return -1;
          }
        }
      }
    }
  }

  if ((flags & NRRF_NOSTART) == 0)
  {
    wait_until_ready(nodes, cnt);
  }

  return 0;
}
Пример #4
0
int NdbRestarter::startNodes(const int * nodes, int num_nodes){
  if (!isConnected())
    return -1;
  
  if (ndb_mgm_start(handle, num_nodes, nodes) != num_nodes) {
    MGMERR(handle);
    g_err  << "Could not start all nodes " << endl;
    return -1;
  }
  
  return 0;
}
Пример #5
0
int NdbRestarter::startAll(){
  if (!isConnected())
    return -1;

  if (ndb_mgm_start(handle, 0, NULL) == -1) {
    MGMERR(handle);
    g_err  << "Could not start all nodes " << endl;
    return -1;
  }
  
  return 0;
  
}
Пример #6
0
int NdbRestarter::insertErrorInNode(int _nodeId, int _error){
  if (!isConnected())
    return -1;

  ndb_mgm_reply reply;
  reply.return_code = 0;

  if (ndb_mgm_insert_error(handle, _nodeId, _error, &reply) == -1){
    MGMERR(handle);
    g_err << "Could not insert error in node with id = "<< _nodeId << endl;
  }
  if(reply.return_code != 0){
    g_err << "Error: " << reply.message << endl;
  }
  return 0;
}
Пример #7
0
int NdbRestarter::exitSingleUserMode(){
  if (!isConnected())
    return -1;

  ndb_mgm_reply reply;
  reply.return_code = 0;

  if (ndb_mgm_exit_single_user(handle, &reply) == -1){
    MGMERR(handle);
    g_err << "Could not exit single user mode " << endl;
  }

  if(reply.return_code != 0){
    g_err << "Error: " << reply.message << endl;
  }
  return reply.return_code;  
}
Пример #8
0
int NdbRestarter::dumpStateOneNode(int _nodeId, const int * _args, int _num_args){
 if (!isConnected())
    return -1;

  ndb_mgm_reply reply;
  reply.return_code = 0;

  if (ndb_mgm_dump_state(handle, _nodeId, _args, _num_args, &reply) == -1){
    MGMERR(handle);
    g_err << "Could not dump state in node with id = "<< _nodeId << endl;
  }

  if(reply.return_code != 0){
    g_err << "Error: " << reply.message << endl;
  }
  return reply.return_code;  
}
Пример #9
0
int
NdbRestarter::restartOneDbNode(int _nodeId,
			       bool inital,
			       bool nostart,
			       bool abort){
  if (!isConnected())
    return -1;

  int ret = 0;
  
  if ((ret = ndb_mgm_restart2(handle, 1, &_nodeId,
			      inital, nostart, abort)) <= 0) {
    /**
     * ndb_mgm_restart2 returned error, one reason could
     * be that the node have not stopped fast enough!
     * Check status of the node to see if it's on the 
     * way down. If that's the case ignore the error
     */ 

    if (getStatus() != 0)
      return -1;

    g_info << "ndb_mgm_restart2 returned with error, checking node state" << endl;

    for(size_t i = 0; i < ndbNodes.size(); i++){
      if(ndbNodes[i].node_id == _nodeId){
	g_info <<_nodeId<<": status="<<ndbNodes[i].node_status<<endl;
	/* Node found check state */
	switch(ndbNodes[i].node_status){
	case NDB_MGM_NODE_STATUS_RESTARTING:
	case NDB_MGM_NODE_STATUS_SHUTTING_DOWN:
	  return 0;
	default:
	  break;
	}
      }
    }
    
    MGMERR(handle);
    g_err  << "Could not stop node with id = "<< _nodeId << endl;
    return -1;
  }

  return 0;
}
Пример #10
0
int NdbRestarter::enterSingleUserMode(int _nodeId){
  if (!isConnected())
    return -1;

  ndb_mgm_reply reply;
  reply.return_code = 0;

  if (ndb_mgm_enter_single_user(handle, _nodeId, &reply) == -1){
    MGMERR(handle);
    g_err << "Could not enter single user mode api node = "<< _nodeId << endl;
  }
  
  if(reply.return_code != 0){
    g_err << "Error: " << reply.message << endl;
  }
  
  return reply.return_code;  
}
Пример #11
0
int 
NdbRestarter::getStatus(){
  int retries = 0;
  struct ndb_mgm_cluster_state * status;
  struct ndb_mgm_node_state * node;
  
  ndbNodes.clear();
  mgmNodes.clear();
  apiNodes.clear();

  if (!isConnected())
    return -1;
  
  while(retries < 10){
    status = ndb_mgm_get_status(handle);
    if (status == NULL){
      ndbout << "status==NULL, retries="<<retries<<endl;
      MGMERR(handle);
      retries++;
      continue;
    }
    for (int i = 0; i < status->no_of_nodes; i++){
      node = &status->node_states[i];      
      switch(node->node_type){
      case NDB_MGM_NODE_TYPE_NDB:
	ndbNodes.push_back(*node);
	break;
      case NDB_MGM_NODE_TYPE_MGM:
	mgmNodes.push_back(*node);
	break;
      case NDB_MGM_NODE_TYPE_API:
	apiNodes.push_back(*node);
	break;
      default:
	if(node->node_status == NDB_MGM_NODE_STATUS_UNKNOWN ||
	   node->node_status == NDB_MGM_NODE_STATUS_NO_CONTACT){
	  retries++;
	  ndbNodes.clear();
	  mgmNodes.clear();
	  apiNodes.clear();
	  free(status); 
	  status = NULL;
	  i = status->no_of_nodes;

	  ndbout << "kalle"<< endl;
	  break;
	}
	abort();
	break;
      }
    }
    if(status == 0){
      ndbout << "status == 0" << endl;
      continue;
    }
    free(status);
    return 0;
  }
   
  g_err  << "getStatus failed" << endl;
  return -1;
}
Пример #12
0
static int
waitClusterStatus(const char* _addr,
		  ndb_mgm_node_status _status)
{
  int _startphase = -1;

  /* Ignore SIGPIPE */
  signal(SIGPIPE, SIG_IGN);

  handle = ndb_mgm_create_handle();
  if (handle == NULL){
    g_err << "Could not create ndb_mgm handle" << endl;
    return -1;
  }
  g_info << "Connecting to mgmsrv at " << _addr << endl;
  if (ndb_mgm_set_connectstring(handle, _addr))
  {
    MGMERR(handle);
    g_err  << "Connectstring " << _addr << " invalid" << endl;
    return -1;
  }
  if (ndb_mgm_connect(handle,0,0,1)) {
    MGMERR(handle);
    g_err  << "Connection to " << _addr << " failed" << endl;
    return -1;
  }

  int attempts = 0;
  int resetAttempts = 0;
  const int MAX_RESET_ATTEMPTS = 10;
  bool allInState = false;
  int timeout_ms= _timeout * 10; /* In number of 100 milliseconds */
  while (allInState == false){
    if (_timeout > 0 && attempts > _timeout){
      /**
       * Timeout has expired waiting for the nodes to enter
       * the state we want
       */
      bool waitMore = false;
      /**
       * Make special check if we are waiting for
       * cluster to become started
       */
      if(_status == NDB_MGM_NODE_STATUS_STARTED){
	waitMore = true;
	/**
	 * First check if any node is not starting
	 * then it's no idea to wait anymore
	 */
	for (size_t n = 0; n < ndbNodes.size(); n++){
	  if (ndbNodes[n].node_status != NDB_MGM_NODE_STATUS_STARTED &&
	      ndbNodes[n].node_status != NDB_MGM_NODE_STATUS_STARTING)
	    waitMore = false;

	}
      }

      if (!waitMore || resetAttempts > MAX_RESET_ATTEMPTS){
	g_err << "waitNodeState("
	      << ndb_mgm_get_node_status_string(_status)
	      <<", "<<_startphase<<")"
	      << " timeout after " << attempts <<" attemps" << endl;
	return -1;
      }

      g_err << "waitNodeState("
	    << ndb_mgm_get_node_status_string(_status)
	    <<", "<<_startphase<<")"
	    << " resetting number of attempts "
	    << resetAttempts << endl;
      attempts = 0;
      resetAttempts++;
    }

    if (getStatus() != 0){
      return -1;
    }

    /* Assume all nodes are in state(if there is any) */
    allInState = (ndbNodes.size() > 0);

    /* Loop through all nodes and check their state */
    for (size_t n = 0; n < ndbNodes.size(); n++) {
      ndb_mgm_node_state* ndbNode = &ndbNodes[n];

      assert(ndbNode != NULL);

      g_info << "Node " << ndbNode->node_id << ": "
	     << ndb_mgm_get_node_status_string(ndbNode->node_status)<< endl;

      if (ndbNode->node_status !=  _status)
	  allInState = false;
    }

    if (!allInState) {
      g_info << "Waiting for cluster enter state "
             << ndb_mgm_get_node_status_string(_status)<< endl;
      NdbSleep_MilliSleep(100);
    }

    attempts++;
  }
  return 0;
}
Пример #13
0
int 
getStatus(){
  int retries = 0;
  struct ndb_mgm_cluster_state * status;
  struct ndb_mgm_node_state * node;
  
  ndbNodes.clear();

  while(retries < 10){
    status = ndb_mgm_get_status(handle);
    if (status == NULL){
      ndbout << "status==NULL, retries="<<retries<<endl;
      MGMERR(handle);
      retries++;
      ndb_mgm_disconnect(handle);
      if (ndb_mgm_connect(handle,0,0,1)) {
        MGMERR(handle);
        g_err  << "Reconnect failed" << endl;
        break;
      }
      continue;
    }
    int count = status->no_of_nodes;
    for (int i = 0; i < count; i++){
      node = &status->node_states[i];      
      switch(node->node_type){
      case NDB_MGM_NODE_TYPE_NDB:
	ndbNodes.push_back(*node);
	break;
      case NDB_MGM_NODE_TYPE_MGM:
        /* Don't care about MGM nodes */
	break;
      case NDB_MGM_NODE_TYPE_API:
        /* Don't care about API nodes */
	break;
      default:
	if(node->node_status == NDB_MGM_NODE_STATUS_UNKNOWN ||
	   node->node_status == NDB_MGM_NODE_STATUS_NO_CONTACT){
	  retries++;
	  ndbNodes.clear();
	  free(status); 
	  status = NULL;
          count = 0;

	  ndbout << "kalle"<< endl;
	  break;
	}
	abort();
	break;
      }
    }
    if(status == 0){
      ndbout << "status == 0" << endl;
      continue;
    }
    free(status);
    return 0;
  }

  return -1;
}
Пример #14
0
int 
NdbRestarter::getStatus(){
  int retries = 0;
  struct ndb_mgm_cluster_state * status;
  struct ndb_mgm_node_state * node;
  
  ndbNodes.clear();
  mgmNodes.clear();
  apiNodes.clear();

  if (!isConnected())
    return -1;
  
  while(retries < 10){
    status = ndb_mgm_get_status(handle);
    if (status == NULL){
      if (m_reconnect){
        if (connect() == 0){
          g_err << "Reconnected..." << endl;
          continue;
        }
        const int err = ndb_mgm_get_latest_error(handle);
        if (err == NDB_MGM_COULD_NOT_CONNECT_TO_SOCKET){
          g_err << "Could not connect to socket, sleep and retry" << endl;
          retries= 0;
          NdbSleep_SecSleep(1);
          continue;
        }
      }
      const int err = ndb_mgm_get_latest_error(handle);
      ndbout << "status==NULL, retries="<<retries<< " err=" << err << endl;
      MGMERR(handle);
      retries++;
      continue;
    }
    for (int i = 0; i < status->no_of_nodes; i++){
      node = &status->node_states[i];      
      switch(node->node_type){
      case NDB_MGM_NODE_TYPE_NDB:
	ndbNodes.push_back(*node);
	break;
      case NDB_MGM_NODE_TYPE_MGM:
	mgmNodes.push_back(*node);
	break;
      case NDB_MGM_NODE_TYPE_API:
	apiNodes.push_back(*node);
	break;
      default:
	if(node->node_status == NDB_MGM_NODE_STATUS_UNKNOWN ||
	   node->node_status == NDB_MGM_NODE_STATUS_NO_CONTACT){
	  retries++;
	  ndbNodes.clear();
	  mgmNodes.clear();
	  apiNodes.clear();
	  free(status); 
	  status = NULL;
	  i = status->no_of_nodes;

	  ndbout << "kalle"<< endl;
	  break;
	}
	abort();
	break;
      }
    }
    if(status == 0){
      ndbout << "status == 0" << endl;
      continue;
    }
    free(status);
    return 0;
  }
   
  g_err  << "getStatus failed" << endl;
  return -1;
}