Example #1
0
Uint32 
TransporterRegistry::poll_TCP(Uint32 timeOutMillis)
{
  bool hasdata = false;
  if (false && nTCPTransporters == 0)
  {
    tcpReadSelectReply = 0;
    return 0;
  }
  
  NDB_SOCKET_TYPE maxSocketValue = -1;
  
  // Needed for TCP/IP connections
  // The read- and writeset are used by select
  
  FD_ZERO(&tcpReadset);

  // Prepare for sending and receiving
  for (int i = 0; i < nTCPTransporters; i++) {
    TCP_Transporter * t = theTCPTransporters[i];
    
    // If the transporter is connected
    NodeId nodeId = t->getRemoteNodeId();
    if (is_connected(nodeId) && t->isConnected()) {
      
      const NDB_SOCKET_TYPE socket = t->getSocket();
      // Find the highest socket value. It will be used by select
      if (socket > maxSocketValue)
	maxSocketValue = socket;
      
      // Put the connected transporters in the socket read-set 
      FD_SET(socket, &tcpReadset);
    }
    hasdata |= t->hasReceiveData();
  }
  
  timeOutMillis = hasdata ? 0 : timeOutMillis;
  
  struct timeval timeout;
  timeout.tv_sec  = timeOutMillis / 1000;
  timeout.tv_usec = (timeOutMillis % 1000) * 1000;

  // The highest socket value plus one
  maxSocketValue++; 
  
  tcpReadSelectReply = select(maxSocketValue, &tcpReadset, 0, 0, &timeout);  
  if(false && tcpReadSelectReply == -1 && errno == EINTR)
    g_eventLogger.info("woke-up by signal");

#ifdef NDB_WIN32
  if(tcpReadSelectReply == SOCKET_ERROR)
  {
    NdbSleep_MilliSleep(timeOutMillis);
  }
#endif
  
  return tcpReadSelectReply || hasdata;
}
Example #2
0
void
ErrorReporter::handleError(int messageID,
			   const char* problemData, 
			   const char* objRef,
			   NdbShutdownType nst)
{
  WriteMessage(messageID, problemData,
	       objRef, theEmulatedJamIndex, theEmulatedJam);

  g_eventLogger.info(problemData);
  g_eventLogger.info(objRef);

  childReportError(messageID);

  if(messageID == NDBD_EXIT_ERROR_INSERT){
    NdbShutdown(NST_ErrorInsert);
  } else {
    if (nst == NST_ErrorHandler)
      nst = s_errorHandlerShutdownType;
    NdbShutdown(nst);
  }
}
Example #3
0
/*
 *  MAIN 
 */
int main(int argc, char** argv)
{

  NDB_INIT(argv[0]);

  load_defaults("my",load_default_groups,&argc,&argv);

  int ho_error;
#ifndef DBUG_OFF
  opt_debug= "d:t:O,/tmp/ndb_mgmd.trace";
#endif
  if ((ho_error=handle_options(&argc, &argv, my_long_options, 
			       ndb_std_get_one_option)))
    exit(ho_error);

start:
  glob= new MgmGlobals;

  /**
   * OSE specific. Enable shared ownership of file system resources. 
   * This is needed in order to use the cluster log since the events 
   * from the cluster is written from the 'ndb_receive'(NDBAPI) thread/process.
   */
#if defined NDB_OSE || defined NDB_SOFTOSE
  efs_segment_share();
#endif

  global_mgmt_server_check = 1;

  if (opt_interactive ||
      opt_non_interactive ||
      g_print_full_config) {
    opt_daemon= 0;
  }

  if (opt_mycnf && opt_config_filename)
  {
    ndbout_c("Both --mycnf and -f is not supported");
    return 0;
  }

  if (opt_mycnf == 0 && opt_config_filename == 0)
  {
    struct stat buf;
    if (stat("config.ini", &buf) != -1)
      opt_config_filename = "config.ini";
  }
  
  glob->socketServer = new SocketServer();

  MgmApiService * mapi = new MgmApiService();

  glob->mgmObject = new MgmtSrvr(glob->socketServer,
				 opt_config_filename,
				 opt_connect_str);

  if (g_print_full_config)
    goto the_end;

  if (glob->mgmObject->init())
    goto error_end;

  my_setwd(NdbConfig_get_path(0), MYF(0));

  glob->localNodeId= glob->mgmObject->getOwnNodeId();
  if (glob->localNodeId == 0) {
    goto error_end;
  }

  glob->port= glob->mgmObject->getPort();

  if (glob->port == 0)
    goto error_end;

  glob->interface_name = 0;
  glob->use_specific_ip = false;

  if(!glob->use_specific_ip){
    int count= 5; // no of retries for tryBind
    while(!glob->socketServer->tryBind(glob->port, glob->interface_name)){
      if (--count > 0) {
	NdbSleep_MilliSleep(1000);
	continue;
      }
      ndbout_c("Unable to setup port: %s:%d!\n"
	       "Please check if the port is already used,\n"
	       "(perhaps a ndb_mgmd is already running),\n"
	       "and if you are executing on the correct computer", 
	       (glob->interface_name ? glob->interface_name : "*"), glob->port);
      goto error_end;
    }
    free(glob->interface_name);
    glob->interface_name = 0;
  }

  if(!glob->socketServer->setup(mapi, &glob->port, glob->interface_name))
  {
    ndbout_c("Unable to setup management port: %d!\n"
	     "Please check if the port is already used,\n"
	     "(perhaps a ndb_mgmd is already running),\n"
	     "and if you are executing on the correct computer", 
	     glob->port);
    delete mapi;
    goto error_end;
  }

  if(!glob->mgmObject->check_start()){
    ndbout_c("Unable to check start management server.");
    ndbout_c("Probably caused by illegal initial configuration file.");
    goto error_end;
  }

  if (opt_daemon) {
    // Become a daemon
    char *lockfile= NdbConfig_PidFileName(glob->localNodeId);
    char *logfile=  NdbConfig_StdoutFileName(glob->localNodeId);
    NdbAutoPtr<char> tmp_aptr1(lockfile), tmp_aptr2(logfile);

    if (NdbDaemon_Make(lockfile, logfile, 0) == -1) {
      ndbout << "Cannot become daemon: " << NdbDaemon_ErrorText << endl;
      return 1;
    }
  }

#ifndef NDB_WIN32
  signal(SIGPIPE, SIG_IGN);
#endif
  {
    BaseString error_string;
    if(!glob->mgmObject->start(error_string)){
      ndbout_c("Unable to start management server.");
      ndbout_c("Probably caused by illegal initial configuration file.");
      ndbout_c(error_string.c_str());
      goto error_end;
    }
  }

  //glob->mgmObject->saveConfig();
  mapi->setMgm(glob->mgmObject);

  char msg[256];
  BaseString::snprintf(msg, sizeof(msg),
	   "NDB Cluster Management Server. %s", NDB_VERSION_STRING);
  ndbout_c(msg);
  g_eventLogger.info(msg);

  BaseString::snprintf(msg, 256, "Id: %d, Command port: %d",
	   glob->localNodeId, glob->port);
  ndbout_c(msg);
  g_eventLogger.info(msg);
  
  g_StopServer = false;
  g_RestartServer= false;
  glob->socketServer->startServer();

#if ! defined NDB_OSE && ! defined NDB_SOFTOSE
  if(opt_interactive) {
    BaseString con_str;
    if(glob->interface_name)
      con_str.appfmt("host=%s:%d", glob->interface_name, glob->port);
    else 
      con_str.appfmt("localhost:%d", glob->port);
    Ndb_mgmclient com(con_str.c_str(), 1);
    while(g_StopServer != true && read_and_execute(&com, "ndb_mgm> ", 1));
  } else 
#endif
  {
    while(g_StopServer != true)
      NdbSleep_MilliSleep(500);
  }

  if(g_RestartServer)
    g_eventLogger.info("Restarting server...");
  else
    g_eventLogger.info("Shutting down server...");
  glob->socketServer->stopServer();
  // We disconnect from the ConfigRetreiver mgmd when we delete glob below
  glob->socketServer->stopSessions(true);
  g_eventLogger.info("Shutdown complete");
 the_end:
  delete glob;
  if(g_RestartServer)
    goto start;
  ndb_end(opt_endinfo ? MY_CHECK_ERROR | MY_GIVE_INFO : 0);
  return 0;
 error_end:
  delete glob;
  ndb_end(opt_endinfo ? MY_CHECK_ERROR | MY_GIVE_INFO : 0);
  return 1;
}
Example #4
0
// run as own thread
void
TransporterRegistry::start_clients_thread()
{
  int persist_mgm_count= 0;
  DBUG_ENTER("TransporterRegistry::start_clients_thread");
  while (m_run_start_clients_thread) {
    NdbSleep_MilliSleep(100);
    persist_mgm_count++;
    if(persist_mgm_count==50)
    {
      ndb_mgm_check_connection(m_mgm_handle);
      persist_mgm_count= 0;
    }
    for (int i= 0, n= 0; n < nTransporters && m_run_start_clients_thread; i++){
      Transporter * t = theTransporters[i];
      if (!t)
	continue;
      n++;

      const NodeId nodeId = t->getRemoteNodeId();
      switch(performStates[nodeId]){
      case CONNECTING:
	if(!t->isConnected() && !t->isServer) {
	  bool connected= false;
	  /**
	   * First, we try to connect (if we have a port number).
	   */
	  if (t->get_s_port())
	    connected= t->connect_client();

	  /**
	   * If dynamic, get the port for connecting from the management server
	   */
	  if( !connected && t->get_s_port() <= 0) {	// Port is dynamic
	    int server_port= 0;
	    struct ndb_mgm_reply mgm_reply;

	    if(!ndb_mgm_is_connected(m_mgm_handle))
	      ndb_mgm_connect(m_mgm_handle, 0, 0, 0);
	    
	    if(ndb_mgm_is_connected(m_mgm_handle))
	    {
	      int res=
		ndb_mgm_get_connection_int_parameter(m_mgm_handle,
						     t->getRemoteNodeId(),
						     t->getLocalNodeId(),
						     CFG_CONNECTION_SERVER_PORT,
						     &server_port,
						     &mgm_reply);
	      DBUG_PRINT("info",("Got dynamic port %d for %d -> %d (ret: %d)",
				 server_port,t->getRemoteNodeId(),
				 t->getLocalNodeId(),res));
	      if( res >= 0 )
	      {
		/**
		 * Server_port == 0 just means that that a mgmt server
		 * has not received a new port yet. Keep the old.
		 */
		if (server_port)
		  t->set_s_port(server_port);
	      }
	      else if(ndb_mgm_is_connected(m_mgm_handle))
	      {
		g_eventLogger.info("Failed to get dynamic port to connect to: %d", res);
		ndb_mgm_disconnect(m_mgm_handle);
	      }
	      else
	      {
		g_eventLogger.info("Management server closed connection early. "
			 "It is probably being shut down (or has problems). "
			 "We will retry the connection. %d %s %s line: %d",
                                   ndb_mgm_get_latest_error(m_mgm_handle),
                                   ndb_mgm_get_latest_error_desc(m_mgm_handle),
                                   ndb_mgm_get_latest_error_msg(m_mgm_handle),
                                   ndb_mgm_get_latest_error_line(m_mgm_handle)
                                   );
	      }
	    }
	    /** else
	     * We will not be able to get a new port unless
	     * the m_mgm_handle is connected. Note that not
	     * being connected is an ok state, just continue
	     * until it is able to connect. Continue using the
	     * old port until we can connect again and get a
	     * new port.
	     */
	  }
	}
	break;
      case DISCONNECTING:
	if(t->isConnected())
	  t->doDisconnect();
	break;
      default:
	break;
      }
    }
  }
  DBUG_VOID_RETURN;
}
Example #5
0
void
MgmApiSession::get_nodeid(Parser_t::Context &,
			  const class Properties &args)
{
  const char *cmd= "get nodeid reply";
  Uint32 version, nodeid= 0, nodetype= 0xff;
  Uint32 timeout= 20;  // default seconds timeout
  const char * transporter;
  const char * user;
  const char * password;
  const char * public_key;
  const char * endian= NULL;
  const char * name= NULL;
  Uint32 log_event= 1;
  bool log_event_version;
  union { long l; char c[sizeof(long)]; } endian_check;

  args.get("version", &version);
  args.get("nodetype", &nodetype);
  args.get("transporter", &transporter);
  args.get("nodeid", &nodeid);
  args.get("user", &user);
  args.get("password", &password);
  args.get("public key", &public_key);
  args.get("endian", &endian);
  args.get("name", &name);
  args.get("timeout", &timeout);
  /* for backwards compatability keep track if client uses new protocol */
  log_event_version= args.get("log_event", &log_event);

  endian_check.l = 1;
  if(endian 
     && strcmp(endian,(endian_check.c[sizeof(long)-1])?"big":"little")!=0) {
    m_output->println(cmd);
    m_output->println("result: Node does not have the same endianness as the management server.");
    m_output->println("");
    return;
  }

  bool compatible;
  switch (nodetype) {
  case NODE_TYPE_MGM:
  case NODE_TYPE_API:
    compatible = ndbCompatible_mgmt_api(NDB_VERSION, version);
    break;
  case NODE_TYPE_DB:
    compatible = ndbCompatible_mgmt_ndb(NDB_VERSION, version);
    break;
  default:
    m_output->println(cmd);
    m_output->println("result: unknown nodetype %d", nodetype);
    m_output->println("");
    return;
  }

  struct sockaddr_in addr;
  SOCKET_SIZE_TYPE addrlen= sizeof(addr);
  int r = getpeername(m_socket, (struct sockaddr*)&addr, &addrlen);
  if (r != 0 ) {
    m_output->println(cmd);
    m_output->println("result: getpeername(%d) failed, err= %d", m_socket, r);
    m_output->println("");
    return;
  }

  NodeId tmp= nodeid;
  if(tmp == 0 || !m_allocated_resources->is_reserved(tmp)){
    BaseString error_string;
    int error_code;
    NDB_TICKS tick= 0;
    /* only report error on second attempt as not to clog the cluster log */
    while (!m_mgmsrv.alloc_node_id(&tmp, (enum ndb_mgm_node_type)nodetype, 
                                   (struct sockaddr*)&addr, &addrlen,
                                   error_code, error_string,
                                   tick == 0 ? 0 : log_event))
    {
      /* NDB_MGM_ALLOCID_CONFIG_MISMATCH is a non retriable error */
      if (tick == 0 && error_code != NDB_MGM_ALLOCID_CONFIG_MISMATCH)
      {
        // attempt to free any timed out reservations
        tick= NdbTick_CurrentMillisecond();
        struct PurgeStruct ps;
        m_mgmsrv.get_connected_nodes(ps.free_nodes);
        // invert connected_nodes to get free nodes
        ps.free_nodes.bitXORC(NodeBitmask());
        ps.str= 0;
        ps.tick= tick;
        m_mgmsrv.get_socket_server()->
          foreachSession(stop_session_if_timed_out,&ps);
	m_mgmsrv.get_socket_server()->checkSessions();
        error_string = "";
        continue;
      }
      const char *alias;
      const char *str;
      alias= ndb_mgm_get_node_type_alias_string((enum ndb_mgm_node_type)
						nodetype, &str);
      m_output->println(cmd);
      m_output->println("result: %s", error_string.c_str());
      /* only use error_code protocol if client knows about it */
      if (log_event_version)
        m_output->println("error_code: %d", error_code);
      m_output->println("");
      return;
    }
  }    
  
#if 0
  if (!compatible){
    m_output->println(cmd);
    m_output->println("result: incompatible version mgmt 0x%x and node 0x%x",
		      NDB_VERSION, version);
    m_output->println("");
    return;
  }
#endif
  
  m_output->println(cmd);
  m_output->println("nodeid: %u", tmp);
  m_output->println("result: Ok");
  m_output->println("");
  m_allocated_resources->reserve_node(tmp, timeout*1000);
  
  if (name)
    g_eventLogger.info("Node %d: %s", tmp, name);

  return;
}
Example #6
0
void 
WatchDog::run()
{
  unsigned int anIPValue, sleep_time;
  unsigned int oldIPValue = 0;
  unsigned int theIntervalCheck = theInterval;
  struct MicroSecondTimer start_time, last_time, now;
  NdbTick_getMicroTimer(&start_time);
  last_time = start_time;

  // WatchDog for the single threaded NDB
  while (!theStop)
  {
    sleep_time= 100;

    NdbSleep_MilliSleep(sleep_time);
    if(theStop)
      break;

    NdbTick_getMicroTimer(&now);
    if (NdbTick_getMicrosPassed(last_time, now)/1000 > sleep_time*2)
    {
      struct tms my_tms;
      times(&my_tms);
      g_eventLogger.info("Watchdog: User time: %llu  System time: %llu",
                         (Uint64)my_tms.tms_utime,
                         (Uint64)my_tms.tms_stime);
      g_eventLogger.warning("Watchdog: Warning overslept %u ms, expected %u ms.",
                            NdbTick_getMicrosPassed(last_time, now)/1000,
                            sleep_time);
    }
    last_time = now;

    // Verify that the IP thread is not stuck in a loop
    anIPValue = *theIPValue;
    if (anIPValue != 0)
    {
      oldIPValue = anIPValue;
      globalData.incrementWatchDogCounter(0);
      NdbTick_getMicroTimer(&start_time);
      theIntervalCheck = theInterval;
    }
    else
    {
      int warn = 1;
      Uint32 elapsed = NdbTick_getMicrosPassed(start_time, now)/1000;
      /*
        oldIPValue == 9 indicates malloc going on, this can take some time
        so only warn if we pass the watchdog interval
      */
      if (oldIPValue == 9)
        if (elapsed < theIntervalCheck)
          warn = 0;
        else
          theIntervalCheck += theInterval;

      if (warn)
      {
        const char *last_stuck_action = get_action(oldIPValue);
        g_eventLogger.warning("Ndb kernel is stuck in: %s", last_stuck_action);
        {
          struct tms my_tms;
          times(&my_tms);
          g_eventLogger.info("Watchdog: User time: %llu  System time: %llu",
                             (Uint64)my_tms.tms_utime,
                             (Uint64)my_tms.tms_stime);
        }
        if (elapsed > 3 * theInterval)
        {
          shutdownSystem(last_stuck_action);
        }
      }
    }
  }
  return;
}