/****** uti/prog/sge_get_alias_path() ***************************************** * NAME * sge_get_alias_path() -- Return the path of the 'alias_file' * * SYNOPSIS * const char* sge_get_alias_path(void) * * FUNCTION * Return the path of the 'alias_file' * * NOTES * MT-NOTE: sge_get_alias_path() is MT safe * ******************************************************************************/ const char *sge_get_alias_path(void) { const char *sge_root, *sge_cell; char *cp; int len; SGE_STRUCT_STAT sbuf; DENTER_(TOP_LAYER, "sge_get_alias_path"); sge_root = sge_get_root_dir(1, NULL, 0, 1); sge_cell = sge_get_default_cell(); if (SGE_STAT(sge_root, &sbuf)) { CRITICAL((SGE_EVENT, MSG_SGETEXT_SGEROOTNOTFOUND_S , sge_root)); SGE_EXIT(NULL, 1); } len = strlen(sge_root) + strlen(sge_cell) + strlen(COMMON_DIR) + strlen(ALIAS_FILE) + 5; if (!(cp = malloc(len))) { CRITICAL((SGE_EVENT, MSG_MEMORY_MALLOCFAILEDFORPATHTOHOSTALIASFILE )); SGE_EXIT(NULL, 1); } sprintf(cp, "%s/%s/%s/%s", sge_root, sge_cell, COMMON_DIR, ALIAS_FILE); DRETURN_(cp); }
/****** sgeobj/var/var_list_set_sharedlib_path() ****************************** * NAME * var_list_set_sharedlib_path -- set shared lib path * * SYNOPSIS * void var_list_set_sharedlib_path(lList **varl); * * FUNCTION * Sets or replaces the shared lib path in the list of variables. * The SGE shared lib path is always set to the beginning of the * resulting shared lib path * (security, see var_get_sharedlib_path_name()) * * INPUTS * lList **varl - list of nment variables * * SEE ALSO * sgeobj/var/var_get_sharedlib_path_name() * sgeobj/var/var_list_set_string() * sgeobj/var/var_list_set_int() * sgeobj/var/var_list_set_sge_u32() ******************************************************************************/ void var_list_set_sharedlib_path(lList **varl) { char *sharedlib_path; char *sge_sharedlib_path; const char *sge_root = sge_get_root_dir(0, NULL, 0, 1); const char *sharedlib_path_name = var_get_sharedlib_path_name(); lListElem *sharedlib_elem = NULL; DENTER(TOP_LAYER, "set_sharedlib_path"); /* this is the SGE sharedlib path */ sge_sharedlib_path = sge_malloc(strlen(sge_root) + strlen("/lib/") + strlen(sge_get_arch()) + 1); sprintf(sge_sharedlib_path, "%s/lib/%s", sge_root, sge_get_arch()); /* if already in environment: extend by SGE sharedlib path, else set */ sharedlib_elem = lGetElemStr(*varl, VA_variable, sharedlib_path_name); if(sharedlib_elem != NULL) { const char *old_value = lGetString(sharedlib_elem, VA_value); if(old_value && strlen(old_value) > 0) { DPRINTF(("sharedlib path %s already set:\n", sharedlib_path_name)); sharedlib_path = sge_malloc(strlen(old_value) + 1 + strlen(sge_sharedlib_path) + 1); strcpy(sharedlib_path, sge_sharedlib_path); strcat(sharedlib_path, ":"); strcat(sharedlib_path, old_value); lSetString(sharedlib_elem, VA_value, sharedlib_path); sge_free(&sharedlib_path); } else { DPRINTF(("overwriting empty sharedlib path %s\n", sharedlib_path_name)); lSetString(sharedlib_elem, VA_value, sge_sharedlib_path); } } else { DPRINTF(("creating new sharedlib path %s\n", sharedlib_path_name)); sharedlib_elem = lAddElemStr(varl, VA_variable, sharedlib_path_name, VA_Type); lSetString(sharedlib_elem, VA_value, sge_sharedlib_path); } sge_free(&sge_sharedlib_path); DEXIT; }
/****** qmaster/sge_qmaster_main/main() **************************************** * NAME * main() -- qmaster entry point * * SYNOPSIS * int main(int argc, char* argv[]) * * FUNCTION * Qmaster entry point. * * NOTE: The main thread must block all signals before any additional thread * is created. Failure to do so will ruin signal handling! * * INPUTS * int argc - number of commandline arguments * char* argv[] - commandline arguments * * RESULT * 0 - success * * NOTES * We check whether 'SGE_ROOT' is set before we daemonize. Once qmaster is * a daemon, we are no longer connected to a terminal and hence can not * output an error message to stdout or stderr. * * We need to inovke 'prepare_enroll()' *before* the user id is switched via * 'become_admin_user()'. This is because qmaster must be able to bind a so * called reserved port (requires root privileges) if configured to do so. * *******************************************************************************/ int main(int argc, char* argv[]) { int max_enroll_tries; int ret_val; int file_descriptor_settings_result = 0; bool has_daemonized = false; sge_gdi_ctx_class_t *ctx = NULL; u_long32 start_time = sge_get_gmt(); monitoring_t monitor; DENTER_MAIN(TOP_LAYER, "qmaster"); sge_monitor_init(&monitor, "MAIN", NONE_EXT, MT_WARNING, MT_ERROR); prof_mt_init(); sge_get_root_dir(true, NULL, 0, true); #ifdef __SGE_COMPILE_WITH_GETTEXT__ sge_init_language_func((gettext_func_type)gettext, (setlocale_func_type)setlocale, (bindtextdomain_func_type)bindtextdomain, (textdomain_func_type)textdomain); sge_init_language(NULL,NULL); #endif /* * qmaster doesn't support any commandline anymore, * but we should show version string and -help option */ if (argc != 1) { sigset_t sig_set; sigfillset(&sig_set); pthread_sigmask(SIG_SETMASK, &sig_set, NULL); sge_qmaster_thread_init(&ctx, QMASTER, MAIN_THREAD, true); sge_process_qmaster_cmdline(argv); SGE_EXIT((void**)&ctx, 1); } /* * daemonize qmaster * set file descriptor limits * and initialize libraries to be used in multi threaded environment * also take care that finished child processed of this process become * zombie jobs */ has_daemonized = sge_daemonize_qmaster(); file_descriptor_settings_result = set_file_descriptor_limit(); #if !defined(INTERIX) && !defined(CYGWIN) init_sig_action_and_mask(); #endif /* init qmaster threads without becomming admin user */ sge_qmaster_thread_init(&ctx, QMASTER, MAIN_THREAD, false); ctx->set_daemonized(ctx, has_daemonized); /* this must be done as root user to be able to bind ports < 1024 */ max_enroll_tries = 30; while (cl_com_get_handle(prognames[QMASTER],1) == NULL) { ctx->prepare_enroll(ctx); max_enroll_tries--; if (max_enroll_tries <= 0) { /* exit after 30 seconds */ CRITICAL((SGE_EVENT, MSG_QMASTER_COMMUNICATION_ERRORS )); SGE_EXIT((void**)&ctx, 1); } if (cl_com_get_handle(prognames[QMASTER],1) == NULL) { /* sleep when prepare_enroll() failed */ sleep(1); } } /* * now the commlib up and running. Set qmaster application status function * (commlib callback function for qping status information response * messages (SIRM)) */ ret_val = cl_com_set_status_func(sge_qmaster_application_status); if (ret_val != CL_RETVAL_OK) { ERROR((SGE_EVENT, cl_get_error_text(ret_val))); } /* * now we become admin user change into the correct root directory set the * the target for logging messages */ sge_become_admin_user(ctx->get_admin_user(ctx)); sge_chdir_exit(ctx->get_qmaster_spool_dir(ctx), 1); log_state_set_log_file(ERR_FILE); ctx->set_exit_func(ctx, sge_exit_func); #if defined(SOLARIS) /* Init shared SMF libs if necessary */ if (sge_smf_used() == 1 && sge_smf_init_libs() != 0) { SGE_EXIT((void**)&ctx, 1); } #endif /* * We do increment the heartbeat manually here. This is the 'startup heartbeat'. * The first time the hearbeat will be incremented through the heartbeat event * handler is after about HEARTBEAT_INTERVAL seconds. The hardbeat event handler * is setup during the initialisazion of the timer thread. */ inc_qmaster_heartbeat(QMASTER_HEARTBEAT_FILE, HEARTBEAT_INTERVAL, NULL); /* * Event master module has to be initialized already here because * sge_setup_qmaster() might already access it although event delivery * thread is not running. * * Corresponding shutdown is done in sge_event_master_terminate(); * * EB: In my opinion the init function should called in * sge_event_master_initialize(). Is it possible to move that call? */ sge_event_master_init(); sge_setup_qmaster(ctx, argv); #ifndef USE_POLL if (file_descriptor_settings_result == 1) { WARNING((SGE_EVENT, MSG_QMASTER_FD_SETSIZE_LARGER_THAN_LIMIT_U, sge_u32c(FD_SETSIZE))); WARNING((SGE_EVENT, MSG_QMASTER_FD_SETSIZE_COMPILE_MESSAGE1_U, sge_u32c(FD_SETSIZE - 20))); WARNING((SGE_EVENT, MSG_QMASTER_FD_SETSIZE_COMPILE_MESSAGE2)); WARNING((SGE_EVENT, MSG_QMASTER_FD_SETSIZE_COMPILE_MESSAGE3)); } #endif /* * Setup all threads and initialize corresponding modules. * Order is important! */ sge_signaler_initialize(ctx); sge_event_master_initialize(ctx); sge_timer_initialize(ctx, &monitor); sge_worker_initialize(ctx); #if 0 sge_test_initialize(ctx); #endif sge_listener_initialize(ctx); sge_scheduler_initialize(ctx, NULL); #ifndef NO_JNI sge_jvm_initialize(ctx, NULL); #endif INFO((SGE_EVENT, "qmaster startup took "sge_u32" seconds", sge_get_gmt() - start_time)); /* * Block till signal from signal thread arrives us */ sge_thread_wait_for_signal(); /* * Shutdown all threads and shutdown corresponding modules. * Order is important! */ #ifndef NO_JNI sge_jvm_terminate(ctx, NULL); #endif sge_scheduler_terminate(ctx, NULL); sge_listener_terminate(); #if 0 sge_test_terminate(ctx); #endif sge_worker_terminate(ctx); sge_timer_terminate(); sge_event_master_terminate(); sge_signaler_terminate(); /* * Remaining shutdown operations */ sge_clean_lists(); sge_monitor_free(&monitor); sge_shutdown((void**)&ctx, sge_qmaster_get_exit_state()); sge_prof_cleanup(); DEXIT; return 0; } /* main() */
/****** shepherd/qrsh/qlogin_starter() **************************************** * * NAME * qlogin_starter -- short description * * SYNOPSIS * #include "qlogin_starter.h" * int qlogin_starter(const char *cwd, char *daemon); * * FUNCTION * The function is called from shepherd to start a protocol daemon * like telnetd, rshd or rlogind. * The mechanism used to call these daemons is that of inetd: * - a socket is created (server side, any free port is assigned * by the operating system) * - qlogin_starter waits for someone to connect to this socket * - the socket file handles are redirected to stdin, stdout * and stderr * - the daemon process is started * Additionally to the inetd mechanism, the port number and some * other information is sent to the qrsh process that initiated * (over qmaster, schedd, execd, shepherd) the qlogin_starter call. * * INPUTS * cwd - the current working directory (the active_jobs directory) * daemon - name and path of the daemon to start * * RESULT * on success, the function will not return (it exec's) * 4, if there is a problem with permissions * 5, if a socket cannot be allocated * 6, if a socket bind fails * 7, if socket name (port) cannot be determined * 8, if environment (to be passed to qrsh) cannot be read * 9, if sending information to qrsh fails * 10, if nobody connects to the socket within a one minute * 11, if the acception of a connecting client fails * 12, if the execution of the daemon fails ******************************************************************************/ int qlogin_starter(const char *cwd, char *daemon, char** env) { int ret; int port; int fd; int maxfd; int sockfd; int on = 1; int sso = 1; int newsfd; fd_set fds; struct sockaddr_in serv_addr; struct timeval timeout; char buffer[2048]; char *args[20]; /* JG: TODO: should be dynamically allocated */ int argc = 0; const char *sge_root = NULL; const char *arch = NULL; #if defined(IRIX65) || defined(INTERIX) || defined(DARWIN6) || defined(ALPHA5) || defined(HP1164) int length; int len; #else socklen_t length; socklen_t len; #endif len = sizeof(serv_addr); /* must be root because we must access /dev/something */ if( setgid(SGE_SUPERUSER_GID) || setuid(SGE_SUPERUSER_UID) || setegid(SGE_SUPERUSER_GID) || seteuid(SGE_SUPERUSER_UID)) { shepherd_trace("cannot change uid/gid\n"); return 4; } shepherd_trace("uid = "uid_t_fmt", euid = "uid_t_fmt", gid = "gid_t_fmt ", egid = "gid_t_fmt, getuid(), geteuid(), getgid(), getegid()); /* socket stuff from here */ sockfd = socket(AF_INET, SOCK_STREAM, 0); if (sockfd == -1) { shepherd_trace("cannot open socket."); return 5; } shepherd_trace("using sfd %d", sockfd); setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, (char *) &on, sizeof(on)); /* bind an address to any socket */ memset((char *) &serv_addr, 0, sizeof(serv_addr)); serv_addr.sin_port = 0; serv_addr.sin_family = AF_INET; serv_addr.sin_addr.s_addr = INADDR_ANY; ret = bind(sockfd, (struct sockaddr *) &serv_addr, sizeof(serv_addr)); if (ret != 0) { shepherd_trace("cannot bind socket: %s", strerror(errno)); shutdown(sockfd, 2); close(sockfd); return 6; } /* find out assigned port number and pass it to caller */ length = sizeof(serv_addr); if (getsockname(sockfd,(struct sockaddr *) &serv_addr, &length) == -1) { shepherd_trace("getting socket name failed: %s", strerror(errno)); shutdown(sockfd, 2); close(sockfd); return 7; } /* listen on socked - make connections be accepted */ if (listen(sockfd, 1) != 0) { shepherd_trace("listen failed: %s", strerror(errno)); shutdown(sockfd, 2); close(sockfd); return 8; } /* send necessary info to qrsh: port + utilbin directory + active job * directory */ port = ntohs(serv_addr.sin_port); shepherd_trace("bound to port %d", port); sge_root = sge_get_root_dir(0, NULL, 0, 1); arch = sge_get_arch(); if (sge_root == NULL || arch == NULL) { shepherd_trace("reading environment SGE_ROOT and ARC failed"); shutdown(sockfd, 2); close(sockfd); return 9; } snprintf(buffer, 2048, "0:%d:%s/utilbin/%s:%s:%s", port, sge_root, arch, cwd, get_conf_val("host")); if (write_to_qrsh(buffer) != 0) { shepherd_trace("communication with qrsh failed"); shutdown(sockfd, 2); close(sockfd); return 10; } /* wait for connection */ shepherd_trace("waiting for connection."); /* use a reasonable timeout (60 seconds) to prevent hanging here forever */ FD_ZERO(&fds); FD_SET(sockfd, &fds); timeout.tv_sec = 60; timeout.tv_usec = 0; if (select(sockfd+1, &fds, NULL, NULL, &timeout) < 1) { shepherd_trace("nobody connected to the socket"); shutdown(sockfd, 2); close(sockfd); return 11; } /* accept connection */ newsfd = accept(sockfd, (struct sockaddr *)(&serv_addr), &len); if (newsfd == -1) { shepherd_trace("error when accepting socket conection"); shutdown(sockfd, 2); close(sockfd); return 12; } shepherd_trace("accepted connection on fd %d", newsfd); /* now we have a connection and do no longer need the "well known" port * free this resource. */ shutdown(sockfd, 2); close(sockfd); /* don't close on exec */ fcntl( newsfd, F_SETFD, 0 ); /* speed up ;-) */ setsockopt(newsfd, IPPROTO_TCP, TCP_NODELAY, (const char *) &sso, sizeof(int)); /* use this fd as stdin,out,err */ dup2( newsfd, 0 ); dup2( newsfd, 1 ); dup2( newsfd, 2 ); /* close all the rest */ #ifndef WIN32NATIVE maxfd = sysconf(_SC_OPEN_MAX); #else /* WIN32NATIVE */ maxfd = FD_SETSIZE; /* detect maximal number of fds under NT/W2000 (env: Files)*/ #endif /* WIN32NATIVE */ /* we do not use any FD_SET call it is ok to use _SC_OPEN_MAX */ for (fd=3; fd<maxfd; fd++) { close(fd); } shepherd_trace("daemon to start: |%s|", daemon); /* split daemon commandline into single arguments */ /* JG: TODO: might contain quoted arguments containing spaces * make function to split or use an already existing one */ args[argc++] = strtok(daemon, " "); while ((args[argc++] = strtok(NULL, " ")) != NULL); #if 0 { int i = 0; shepherd_trace("daemon commandline split to %d arguments", argc); while (args[i] != NULL) { shepherd_trace("daemon argv[%d] = |%s|", i, args[i]); i++; } } #endif /* that it. */ execve(args[0], args, env); /* oh oh, exec failed */ /* no way to tell anyone, becuase all FDs are closed */ /* last chance -> tell parent process */ shutdown(newsfd, 2); close(newsfd); return 13; }