Uint32 TransporterRegistry::poll_TCP(Uint32 timeOutMillis) { bool hasdata = false; if (false && nTCPTransporters == 0) { tcpReadSelectReply = 0; return 0; } NDB_SOCKET_TYPE maxSocketValue = -1; // Needed for TCP/IP connections // The read- and writeset are used by select FD_ZERO(&tcpReadset); // Prepare for sending and receiving for (int i = 0; i < nTCPTransporters; i++) { TCP_Transporter * t = theTCPTransporters[i]; // If the transporter is connected NodeId nodeId = t->getRemoteNodeId(); if (is_connected(nodeId) && t->isConnected()) { const NDB_SOCKET_TYPE socket = t->getSocket(); // Find the highest socket value. It will be used by select if (socket > maxSocketValue) maxSocketValue = socket; // Put the connected transporters in the socket read-set FD_SET(socket, &tcpReadset); } hasdata |= t->hasReceiveData(); } timeOutMillis = hasdata ? 0 : timeOutMillis; struct timeval timeout; timeout.tv_sec = timeOutMillis / 1000; timeout.tv_usec = (timeOutMillis % 1000) * 1000; // The highest socket value plus one maxSocketValue++; tcpReadSelectReply = select(maxSocketValue, &tcpReadset, 0, 0, &timeout); if(false && tcpReadSelectReply == -1 && errno == EINTR) g_eventLogger.info("woke-up by signal"); #ifdef NDB_WIN32 if(tcpReadSelectReply == SOCKET_ERROR) { NdbSleep_MilliSleep(timeOutMillis); } #endif return tcpReadSelectReply || hasdata; }
void ErrorReporter::handleError(int messageID, const char* problemData, const char* objRef, NdbShutdownType nst) { WriteMessage(messageID, problemData, objRef, theEmulatedJamIndex, theEmulatedJam); g_eventLogger.info(problemData); g_eventLogger.info(objRef); childReportError(messageID); if(messageID == NDBD_EXIT_ERROR_INSERT){ NdbShutdown(NST_ErrorInsert); } else { if (nst == NST_ErrorHandler) nst = s_errorHandlerShutdownType; NdbShutdown(nst); } }
/* * MAIN */ int main(int argc, char** argv) { NDB_INIT(argv[0]); load_defaults("my",load_default_groups,&argc,&argv); int ho_error; #ifndef DBUG_OFF opt_debug= "d:t:O,/tmp/ndb_mgmd.trace"; #endif if ((ho_error=handle_options(&argc, &argv, my_long_options, ndb_std_get_one_option))) exit(ho_error); start: glob= new MgmGlobals; /** * OSE specific. Enable shared ownership of file system resources. * This is needed in order to use the cluster log since the events * from the cluster is written from the 'ndb_receive'(NDBAPI) thread/process. */ #if defined NDB_OSE || defined NDB_SOFTOSE efs_segment_share(); #endif global_mgmt_server_check = 1; if (opt_interactive || opt_non_interactive || g_print_full_config) { opt_daemon= 0; } if (opt_mycnf && opt_config_filename) { ndbout_c("Both --mycnf and -f is not supported"); return 0; } if (opt_mycnf == 0 && opt_config_filename == 0) { struct stat buf; if (stat("config.ini", &buf) != -1) opt_config_filename = "config.ini"; } glob->socketServer = new SocketServer(); MgmApiService * mapi = new MgmApiService(); glob->mgmObject = new MgmtSrvr(glob->socketServer, opt_config_filename, opt_connect_str); if (g_print_full_config) goto the_end; if (glob->mgmObject->init()) goto error_end; my_setwd(NdbConfig_get_path(0), MYF(0)); glob->localNodeId= glob->mgmObject->getOwnNodeId(); if (glob->localNodeId == 0) { goto error_end; } glob->port= glob->mgmObject->getPort(); if (glob->port == 0) goto error_end; glob->interface_name = 0; glob->use_specific_ip = false; if(!glob->use_specific_ip){ int count= 5; // no of retries for tryBind while(!glob->socketServer->tryBind(glob->port, glob->interface_name)){ if (--count > 0) { NdbSleep_MilliSleep(1000); continue; } ndbout_c("Unable to setup port: %s:%d!\n" "Please check if the port is already used,\n" "(perhaps a ndb_mgmd is already running),\n" "and if you are executing on the correct computer", (glob->interface_name ? glob->interface_name : "*"), glob->port); goto error_end; } free(glob->interface_name); glob->interface_name = 0; } if(!glob->socketServer->setup(mapi, &glob->port, glob->interface_name)) { ndbout_c("Unable to setup management port: %d!\n" "Please check if the port is already used,\n" "(perhaps a ndb_mgmd is already running),\n" "and if you are executing on the correct computer", glob->port); delete mapi; goto error_end; } if(!glob->mgmObject->check_start()){ ndbout_c("Unable to check start management server."); ndbout_c("Probably caused by illegal initial configuration file."); goto error_end; } if (opt_daemon) { // Become a daemon char *lockfile= NdbConfig_PidFileName(glob->localNodeId); char *logfile= NdbConfig_StdoutFileName(glob->localNodeId); NdbAutoPtr<char> tmp_aptr1(lockfile), tmp_aptr2(logfile); if (NdbDaemon_Make(lockfile, logfile, 0) == -1) { ndbout << "Cannot become daemon: " << NdbDaemon_ErrorText << endl; return 1; } } #ifndef NDB_WIN32 signal(SIGPIPE, SIG_IGN); #endif { BaseString error_string; if(!glob->mgmObject->start(error_string)){ ndbout_c("Unable to start management server."); ndbout_c("Probably caused by illegal initial configuration file."); ndbout_c(error_string.c_str()); goto error_end; } } //glob->mgmObject->saveConfig(); mapi->setMgm(glob->mgmObject); char msg[256]; BaseString::snprintf(msg, sizeof(msg), "NDB Cluster Management Server. %s", NDB_VERSION_STRING); ndbout_c(msg); g_eventLogger.info(msg); BaseString::snprintf(msg, 256, "Id: %d, Command port: %d", glob->localNodeId, glob->port); ndbout_c(msg); g_eventLogger.info(msg); g_StopServer = false; g_RestartServer= false; glob->socketServer->startServer(); #if ! defined NDB_OSE && ! defined NDB_SOFTOSE if(opt_interactive) { BaseString con_str; if(glob->interface_name) con_str.appfmt("host=%s:%d", glob->interface_name, glob->port); else con_str.appfmt("localhost:%d", glob->port); Ndb_mgmclient com(con_str.c_str(), 1); while(g_StopServer != true && read_and_execute(&com, "ndb_mgm> ", 1)); } else #endif { while(g_StopServer != true) NdbSleep_MilliSleep(500); } if(g_RestartServer) g_eventLogger.info("Restarting server..."); else g_eventLogger.info("Shutting down server..."); glob->socketServer->stopServer(); // We disconnect from the ConfigRetreiver mgmd when we delete glob below glob->socketServer->stopSessions(true); g_eventLogger.info("Shutdown complete"); the_end: delete glob; if(g_RestartServer) goto start; ndb_end(opt_endinfo ? MY_CHECK_ERROR | MY_GIVE_INFO : 0); return 0; error_end: delete glob; ndb_end(opt_endinfo ? MY_CHECK_ERROR | MY_GIVE_INFO : 0); return 1; }
// run as own thread void TransporterRegistry::start_clients_thread() { int persist_mgm_count= 0; DBUG_ENTER("TransporterRegistry::start_clients_thread"); while (m_run_start_clients_thread) { NdbSleep_MilliSleep(100); persist_mgm_count++; if(persist_mgm_count==50) { ndb_mgm_check_connection(m_mgm_handle); persist_mgm_count= 0; } for (int i= 0, n= 0; n < nTransporters && m_run_start_clients_thread; i++){ Transporter * t = theTransporters[i]; if (!t) continue; n++; const NodeId nodeId = t->getRemoteNodeId(); switch(performStates[nodeId]){ case CONNECTING: if(!t->isConnected() && !t->isServer) { bool connected= false; /** * First, we try to connect (if we have a port number). */ if (t->get_s_port()) connected= t->connect_client(); /** * If dynamic, get the port for connecting from the management server */ if( !connected && t->get_s_port() <= 0) { // Port is dynamic int server_port= 0; struct ndb_mgm_reply mgm_reply; if(!ndb_mgm_is_connected(m_mgm_handle)) ndb_mgm_connect(m_mgm_handle, 0, 0, 0); if(ndb_mgm_is_connected(m_mgm_handle)) { int res= ndb_mgm_get_connection_int_parameter(m_mgm_handle, t->getRemoteNodeId(), t->getLocalNodeId(), CFG_CONNECTION_SERVER_PORT, &server_port, &mgm_reply); DBUG_PRINT("info",("Got dynamic port %d for %d -> %d (ret: %d)", server_port,t->getRemoteNodeId(), t->getLocalNodeId(),res)); if( res >= 0 ) { /** * Server_port == 0 just means that that a mgmt server * has not received a new port yet. Keep the old. */ if (server_port) t->set_s_port(server_port); } else if(ndb_mgm_is_connected(m_mgm_handle)) { g_eventLogger.info("Failed to get dynamic port to connect to: %d", res); ndb_mgm_disconnect(m_mgm_handle); } else { g_eventLogger.info("Management server closed connection early. " "It is probably being shut down (or has problems). " "We will retry the connection. %d %s %s line: %d", ndb_mgm_get_latest_error(m_mgm_handle), ndb_mgm_get_latest_error_desc(m_mgm_handle), ndb_mgm_get_latest_error_msg(m_mgm_handle), ndb_mgm_get_latest_error_line(m_mgm_handle) ); } } /** else * We will not be able to get a new port unless * the m_mgm_handle is connected. Note that not * being connected is an ok state, just continue * until it is able to connect. Continue using the * old port until we can connect again and get a * new port. */ } } break; case DISCONNECTING: if(t->isConnected()) t->doDisconnect(); break; default: break; } } } DBUG_VOID_RETURN; }
void MgmApiSession::get_nodeid(Parser_t::Context &, const class Properties &args) { const char *cmd= "get nodeid reply"; Uint32 version, nodeid= 0, nodetype= 0xff; Uint32 timeout= 20; // default seconds timeout const char * transporter; const char * user; const char * password; const char * public_key; const char * endian= NULL; const char * name= NULL; Uint32 log_event= 1; bool log_event_version; union { long l; char c[sizeof(long)]; } endian_check; args.get("version", &version); args.get("nodetype", &nodetype); args.get("transporter", &transporter); args.get("nodeid", &nodeid); args.get("user", &user); args.get("password", &password); args.get("public key", &public_key); args.get("endian", &endian); args.get("name", &name); args.get("timeout", &timeout); /* for backwards compatability keep track if client uses new protocol */ log_event_version= args.get("log_event", &log_event); endian_check.l = 1; if(endian && strcmp(endian,(endian_check.c[sizeof(long)-1])?"big":"little")!=0) { m_output->println(cmd); m_output->println("result: Node does not have the same endianness as the management server."); m_output->println(""); return; } bool compatible; switch (nodetype) { case NODE_TYPE_MGM: case NODE_TYPE_API: compatible = ndbCompatible_mgmt_api(NDB_VERSION, version); break; case NODE_TYPE_DB: compatible = ndbCompatible_mgmt_ndb(NDB_VERSION, version); break; default: m_output->println(cmd); m_output->println("result: unknown nodetype %d", nodetype); m_output->println(""); return; } struct sockaddr_in addr; SOCKET_SIZE_TYPE addrlen= sizeof(addr); int r = getpeername(m_socket, (struct sockaddr*)&addr, &addrlen); if (r != 0 ) { m_output->println(cmd); m_output->println("result: getpeername(%d) failed, err= %d", m_socket, r); m_output->println(""); return; } NodeId tmp= nodeid; if(tmp == 0 || !m_allocated_resources->is_reserved(tmp)){ BaseString error_string; int error_code; NDB_TICKS tick= 0; /* only report error on second attempt as not to clog the cluster log */ while (!m_mgmsrv.alloc_node_id(&tmp, (enum ndb_mgm_node_type)nodetype, (struct sockaddr*)&addr, &addrlen, error_code, error_string, tick == 0 ? 0 : log_event)) { /* NDB_MGM_ALLOCID_CONFIG_MISMATCH is a non retriable error */ if (tick == 0 && error_code != NDB_MGM_ALLOCID_CONFIG_MISMATCH) { // attempt to free any timed out reservations tick= NdbTick_CurrentMillisecond(); struct PurgeStruct ps; m_mgmsrv.get_connected_nodes(ps.free_nodes); // invert connected_nodes to get free nodes ps.free_nodes.bitXORC(NodeBitmask()); ps.str= 0; ps.tick= tick; m_mgmsrv.get_socket_server()-> foreachSession(stop_session_if_timed_out,&ps); m_mgmsrv.get_socket_server()->checkSessions(); error_string = ""; continue; } const char *alias; const char *str; alias= ndb_mgm_get_node_type_alias_string((enum ndb_mgm_node_type) nodetype, &str); m_output->println(cmd); m_output->println("result: %s", error_string.c_str()); /* only use error_code protocol if client knows about it */ if (log_event_version) m_output->println("error_code: %d", error_code); m_output->println(""); return; } } #if 0 if (!compatible){ m_output->println(cmd); m_output->println("result: incompatible version mgmt 0x%x and node 0x%x", NDB_VERSION, version); m_output->println(""); return; } #endif m_output->println(cmd); m_output->println("nodeid: %u", tmp); m_output->println("result: Ok"); m_output->println(""); m_allocated_resources->reserve_node(tmp, timeout*1000); if (name) g_eventLogger.info("Node %d: %s", tmp, name); return; }
void WatchDog::run() { unsigned int anIPValue, sleep_time; unsigned int oldIPValue = 0; unsigned int theIntervalCheck = theInterval; struct MicroSecondTimer start_time, last_time, now; NdbTick_getMicroTimer(&start_time); last_time = start_time; // WatchDog for the single threaded NDB while (!theStop) { sleep_time= 100; NdbSleep_MilliSleep(sleep_time); if(theStop) break; NdbTick_getMicroTimer(&now); if (NdbTick_getMicrosPassed(last_time, now)/1000 > sleep_time*2) { struct tms my_tms; times(&my_tms); g_eventLogger.info("Watchdog: User time: %llu System time: %llu", (Uint64)my_tms.tms_utime, (Uint64)my_tms.tms_stime); g_eventLogger.warning("Watchdog: Warning overslept %u ms, expected %u ms.", NdbTick_getMicrosPassed(last_time, now)/1000, sleep_time); } last_time = now; // Verify that the IP thread is not stuck in a loop anIPValue = *theIPValue; if (anIPValue != 0) { oldIPValue = anIPValue; globalData.incrementWatchDogCounter(0); NdbTick_getMicroTimer(&start_time); theIntervalCheck = theInterval; } else { int warn = 1; Uint32 elapsed = NdbTick_getMicrosPassed(start_time, now)/1000; /* oldIPValue == 9 indicates malloc going on, this can take some time so only warn if we pass the watchdog interval */ if (oldIPValue == 9) if (elapsed < theIntervalCheck) warn = 0; else theIntervalCheck += theInterval; if (warn) { const char *last_stuck_action = get_action(oldIPValue); g_eventLogger.warning("Ndb kernel is stuck in: %s", last_stuck_action); { struct tms my_tms; times(&my_tms); g_eventLogger.info("Watchdog: User time: %llu System time: %llu", (Uint64)my_tms.tms_utime, (Uint64)my_tms.tms_stime); } if (elapsed > 3 * theInterval) { shutdownSystem(last_stuck_action); } } } } return; }