Пример #1
0
int smpd_remove_from_dynamic_hosts(void)
{
#ifndef HAVE_WINDOWS_H
    char myhostname[SMPD_MAX_HOST_LENGTH];
    char hosts[8192];
    char hosts_less_me[8192];
    char *host;
#endif

    smpd_enter_fn(FCNAME);
#ifndef HAVE_WINDOWS_H
    if (smpd_get_hostname(myhostname, SMPD_MAX_HOST_LENGTH) != 0)
    {
	smpd_err_printf("smpd_get_hostname failed, errno = %d\n", errno);
	smpd_exit_fn(FCNAME);
	return SMPD_FAIL;
    }

    smpd_lock_smpd_data();

    hosts[0] = '\0';
    if (smpd_get_smpd_data(SMPD_DYNAMIC_HOSTS_KEY, hosts, 8192) != SMPD_SUCCESS)
    {
	smpd_unlock_smpd_data();
	smpd_dbg_printf("not removing host because "SMPD_DYNAMIC_HOSTS_KEY" does not exist\n");
	smpd_exit_fn(FCNAME);
	return SMPD_FAIL;
    }

    /* create a new hosts lists without this host name in it */
    hosts_less_me[0] = '\0';
    host = strtok(hosts, " \t\r\n");
    while (host)
    {
	if (strcmp(host, myhostname))
	{
	    if (hosts_less_me[0] != '\0')
		strcat(hosts_less_me, " ");
	    strcat(hosts_less_me, host);
	}
	host = strtok(NULL, " \t\r\n");
    }

    if (hosts_less_me[0] == '\0')
    {
	smpd_dbg_printf("removing "SMPD_DYNAMIC_HOSTS_KEY"\n");
	smpd_delete_smpd_data(SMPD_DYNAMIC_HOSTS_KEY);
    }
    else
    {
	smpd_dbg_printf("setting new "SMPD_DYNAMIC_HOSTS_KEY": %s\n", hosts_less_me);
	smpd_set_smpd_data(SMPD_DYNAMIC_HOSTS_KEY, hosts_less_me);
    }
    smpd_unlock_smpd_data();
#endif
    smpd_exit_fn(FCNAME);
    return SMPD_SUCCESS;
}
Пример #2
0
int smpd_get_user_data(const char *key, char *value, int value_len)
{
#ifdef HAVE_WINDOWS_H
    HKEY tkey;
    DWORD len, result;
    char err_msg[512];

    smpd_enter_fn(FCNAME);

    result = RegOpenKeyEx(HKEY_CURRENT_USER, SMPD_REGISTRY_KEY, 0, KEY_READ, &tkey);
    if (result != ERROR_SUCCESS)
    {
	smpd_translate_win_error(result, err_msg, 512, "Unable to open the HKEY_CURRENT_USER\\" SMPD_REGISTRY_KEY " registry key, error %d\n", result);
	smpd_dbg_printf("%s\n", err_msg);
	result = smpd_get_user_data_default(key, value, value_len);
	smpd_exit_fn(FCNAME);
	return result;
    }

    len = value_len;
    result = RegQueryValueEx(tkey, key, 0, NULL, (unsigned char *)value, &len);
    if (result != ERROR_SUCCESS)
    {
	smpd_translate_win_error(result, err_msg, 512, "Unable to read the smpd registry key '%s', error %d\n", key, result);
	smpd_dbg_printf("%s\n", err_msg);
	RegCloseKey(tkey);
	result = smpd_get_user_data_default(key, value, value_len);
	smpd_exit_fn(FCNAME);
	return result;
    }

    result = RegCloseKey(tkey);
    if (result != ERROR_SUCCESS)
    {
	smpd_translate_win_error(result, err_msg, 512, "Unable to close the HKEY_CURRENT_USER\\" SMPD_REGISTRY_KEY " registry key, error %d: ", result);
	smpd_err_printf("%s\n", err_msg);
	smpd_exit_fn(FCNAME);
	return SMPD_FAIL;
    }

    smpd_exit_fn(FCNAME);
    return SMPD_SUCCESS;
#else
    int result;
    smpd_enter_fn(FCNAME);
    result = smpd_get_smpd_data(key, value, value_len);
    if (result != SMPD_SUCCESS)
    {
	result = smpd_get_user_data_default(key, value, value_len);
    }
    smpd_exit_fn(FCNAME);
    return result;
#endif
}
Пример #3
0
int smpd_free_sspi_client_context(smpd_sspi_client_context_t **context)
{
    smpd_sspi_client_context_t *iter, *trailer;

    smpd_enter_fn(FCNAME);

    trailer = iter = smpd_process.sspi_context_list;
    while (iter)
    {
	if (iter == *context)
	{
	    if (trailer != iter)
	    {
		trailer->next = iter->next;
	    }
	    else
	    {
		smpd_process.sspi_context_list = iter->next;
	    }
	    break;
	}
	if (trailer != iter)
	    trailer = trailer->next;
	iter = iter->next;
    }
    if (iter == NULL)
    {
	smpd_dbg_printf("freeing a sspi_client_context not in the global list\n");
    }
    /* FIXME: cleanup sspi structures */
    MPIU_Free(*context);
    *context = NULL;
    smpd_exit_fn(FCNAME);
    return SMPD_SUCCESS;
}
Пример #4
0
int smpd_spn_list_finalize(smpd_spn_list_hnd_t *spn_list_hnd_p)
{
    smpd_host_spn_node_t *spn_list_head, *cur_node;
    smpd_enter_fn(FCNAME);
    if(spn_list_hnd_p == NULL){
        smpd_err_printf("Invalid pointer to spn list handle\n");
        smpd_exit_fn(FCNAME);
        return SMPD_FAIL;
    }
    if(!SMPD_SPN_LIST_HND_IS_INIT(*spn_list_hnd_p)){
        smpd_dbg_printf("Trying to finalize an uninitialized handle\n");
        smpd_exit_fn(FCNAME);
        return SMPD_FAIL;
    }

    spn_list_head = **spn_list_hnd_p;
    while(spn_list_head != NULL){
        cur_node = spn_list_head;
        spn_list_head = cur_node->next;
        MPIU_Free(cur_node);
    }
    /* Free contents of the spn handle */
    MPIU_Free(*spn_list_hnd_p);

    *spn_list_hnd_p = NULL;

    smpd_exit_fn(FCNAME);
    return SMPD_SUCCESS;
}
Пример #5
0
int smpd_hpc_js_task_setenv(ISchedulerTask *ptask, char *proc_encoded_env)
{
    char name[SMPD_MAX_ENV_LENGTH], equals[3], value[SMPD_MAX_ENV_LENGTH];
    wchar_t namew[SMPD_MAX_ENV_LENGTH], valuew[SMPD_MAX_ENV_LENGTH];
    HRESULT hr;

    smpd_enter_fn(FCNAME);
    if((ptask == NULL) || (proc_encoded_env == NULL)){
        smpd_err_printf("Invalid ptr to task or proc environment\n");
        smpd_exit_fn(FCNAME);
        return SMPD_FAIL;
    }

    for (;;){
        name[0] = '\0';
        equals[0] = '\0';
        value[0] = '\0';
        if (MPIU_Str_get_string(&proc_encoded_env, name, SMPD_MAX_ENV_LENGTH) != MPIU_STR_SUCCESS)
            break;
        if (name[0] == '\0')
            break;
        if (MPIU_Str_get_string(&proc_encoded_env, equals, 3) != MPIU_STR_SUCCESS)
            break;
        if (equals[0] == '\0')
            break;
        if (MPIU_Str_get_string(&proc_encoded_env, value, SMPD_MAX_ENV_LENGTH) != MPIU_STR_SUCCESS)
            break;
        smpd_dbg_printf("setting environment variable: <%s> = <%s>\n", name, value);
        mbstowcs(namew, name, SMPD_MAX_ENV_LENGTH);
        mbstowcs(valuew, value, SMPD_MAX_ENV_LENGTH);
        hr = ptask->SetHpcEnvironmentVariable(_bstr_t(namew), _bstr_t(valuew));
    }
    smpd_exit_fn(FCNAME);
    return SMPD_SUCCESS;
}
Пример #6
0
int smpd_hpc_js_finalize(smpd_hpc_js_ctxt_t *pctxt)
{
    int result;
    smpd_enter_fn(FCNAME);

    if(pctxt == NULL){
        smpd_err_printf("ERROR: Invalid pointer to js ctxt\n");
        smpd_exit_fn(FCNAME);
        return SMPD_FAIL;
    }

    if(*pctxt == NULL){
        smpd_dbg_printf("Null js handle\n");
        smpd_exit_fn(FCNAME);
        return SMPD_SUCCESS;
    }

    /* Release the job scheduler object */
    if((*pctxt)->pscheduler){
        ((*pctxt)->pscheduler)->Release();
        (*pctxt)->pscheduler = NULL;
    }

    /* Free the job scheduler handle */
    MPIU_Free(*pctxt);
    *pctxt = NULL;

    smpd_exit_fn(FCNAME);
    return SMPD_SUCCESS;
}
Пример #7
0
/* A bomb thread can be used to guarantee that the service will exit when a stop command is processed */
void smpd_bomb_thread()
{
    if (WaitForSingleObject(smpd_process.hBombDiffuseEvent, (DWORD)10000) == WAIT_TIMEOUT)
    {
	smpd_dbg_printf("smpd_bomb_thread timed out, exiting.\n");
	ExitProcess((UINT)-1);
    }
}
Пример #8
0
int smpd_spn_list_dbg_print(smpd_spn_list_hnd_t hnd){
    smpd_host_spn_node_t *iter=NULL;
    smpd_enter_fn(FCNAME);

    if(!SMPD_SPN_LIST_HND_IS_INIT(hnd)){
        smpd_dbg_printf("Invalid handle to spn list\n");
        smpd_exit_fn(FCNAME);
        return SMPD_FAIL;
    }

    iter = *hnd;
    while(iter){
        smpd_dbg_printf("FQ Service name = %s\n", iter->fq_service_name);
        iter = iter->next;
    }

    smpd_exit_fn(FCNAME);
    return SMPD_SUCCESS;
}
Пример #9
0
int smpd_exit_fn(char *fcname)
{
    if (cur_indent > 0 && cur_indent < SMPD_MAX_INDENT)
    {
	indent[cur_indent-1] = '\0';
    }
    cur_indent--;
    if (smpd_process.dbg_state & SMPD_DBG_STATE_TRACE)
    {
	smpd_dbg_printf("/%s\n", fcname);
    }
    return SMPD_SUCCESS;
}
Пример #10
0
int smpd_enter_fn(char *fcname)
{
    if (smpd_process.dbg_state & SMPD_DBG_STATE_TRACE)
    {
	smpd_dbg_printf("\\%s\n", fcname);
    }
    if (cur_indent >= 0 && cur_indent < SMPD_MAX_INDENT)
    {
	indent[cur_indent] = '.';
	indent[cur_indent+1] = '\0';
    }
    cur_indent++;
    return SMPD_SUCCESS;
}
Пример #11
0
int smpd_generate_session_header(char *str, int session_id)
{
    char * str_orig;
    int result;
    int len;

    smpd_enter_fn(FCNAME);

    str_orig = str;
    *str = '\0';
    len = SMPD_MAX_SESSION_HEADER_LENGTH;

    /* add header fields */
    result = MPIU_Str_add_int_arg(&str, &len, "id", session_id);
    if (result != MPIU_STR_SUCCESS)
    {
	smpd_err_printf("unable to create session header, adding session id failed.\n");
	smpd_exit_fn(FCNAME);
	return SMPD_FAIL;
    }
    result = MPIU_Str_add_int_arg(&str, &len, "parent", smpd_process.id);
    if (result != MPIU_STR_SUCCESS)
    {
	smpd_err_printf("unable to create session header, adding parent id failed.\n");
	smpd_exit_fn(FCNAME);
	return SMPD_FAIL;
    }
    result = MPIU_Str_add_int_arg(&str, &len, "level", smpd_process.level + 1);
    if (result != MPIU_STR_SUCCESS)
    {
	smpd_err_printf("unable to create session header, adding session level failed.\n");
	smpd_exit_fn(FCNAME);
	return SMPD_FAIL;
    }

    /* remove the trailing space */
    str--;
    *str = '\0';

    smpd_dbg_printf("session header: (%s)\n", str_orig);
    smpd_exit_fn(FCNAME);
    return SMPD_SUCCESS;
}
Пример #12
0
int smpd_hpc_js_create_job(smpd_hpc_js_ctxt_t ctxt, smpd_launch_node_t *head, ISchedulerJob **pp_job)
{
    int result;
    HRESULT hr;
    ISchedulerJob *pjob;

    smpd_enter_fn(FCNAME);
    if(!SMPDU_HPC_JS_CTXT_IS_VALID(ctxt)){
        smpd_err_printf("ERROR: Invalid handle to hpc job scheduler\n");
        smpd_exit_fn(FCNAME);
        return SMPD_FAIL;
    }
    if(head == NULL){
        smpd_err_printf("ERROR: Invalid list of launch nodes\n");
        smpd_exit_fn(FCNAME);
        return SMPD_FAIL;
    }
    if(pp_job == NULL){
        smpd_err_printf("ERROR: Invalid ptr to ptr to job object\n");
        smpd_exit_fn(FCNAME);
        return SMPD_FAIL;
    }
    
    hr = (ctxt->pscheduler)->CreateJob(pp_job);
    if(FAILED(hr)){
        smpd_err_printf("ERROR: Creating job failed, 0x%x\n", hr);
        smpd_exit_fn(FCNAME);
        return SMPD_FAIL;
    }
    
    pjob = *pp_job;

    /* Set the properties of the job */
    hr = pjob->put_Name(_bstr_t(L"MPICH_JOB"));
    if(FAILED(hr)){
        smpd_dbg_printf("Unable to set the name of the job\n");
    }

    while(head){
        /* FIXME: We are not releasing these tasks allocated here */
        ISchedulerTask *ptask = NULL;
        wchar_t exe_namew[SMPD_MAX_EXE_LENGTH], wdir[SMPD_MAX_DIR_LENGTH];
        wchar_t filename[SMPD_MAX_EXE_LENGTH];

        hr = pjob->CreateTask(&ptask);
        if(FAILED(hr)){
            smpd_err_printf("ERROR: Creating task failed, 0x%x\n", hr);
            smpd_exit_fn(FCNAME);
            return SMPD_FAIL;
        }

        result = smpd_hpc_js_task_setenv(ptask, head->env_data);
        if(result != SMPD_SUCCESS){
            smpd_err_printf("Unable to add env variable to task\n");
            smpd_exit_fn(FCNAME);
            return SMPD_FAIL;
        }
        mbstowcs(exe_namew, head->exe, SMPD_MAX_EXE_LENGTH);
        hr = ptask->put_CommandLine(_bstr_t(exe_namew));
        if(FAILED(hr)){
            smpd_err_printf("ERROR: Adding command to task failed, 0x%x\n", hr);
            smpd_exit_fn(FCNAME);
            return SMPD_FAIL;
        }

        /* Set the task properties */
        mbstowcs(wdir, head->dir, SMPD_MAX_DIR_LENGTH);
        hr = ptask->put_WorkDirectory(_bstr_t(wdir));
        if(FAILED(hr)){
            smpd_err_printf("ERROR: Unable to set the working directory for job\n");
        }

        _snwprintf_s(filename, SMPD_MAX_EXE_LENGTH, SMPD_MAX_EXE_LENGTH - 1, L"stdout_mpich_%s_%d_%d.txt", exe_namew, head->iproc, head->nproc);
        hr = ptask->put_StdOutFilePath(_bstr_t(filename));
        if(FAILED(hr)){
            smpd_err_printf("ERROR: Unable to set the stdout file path\n");
        }

        _snwprintf_s(filename, SMPD_MAX_EXE_LENGTH, SMPD_MAX_EXE_LENGTH - 1, L"stderr_mpich_%s_%d_%d.txt", exe_namew, head->iproc, head->nproc);
        hr = ptask->put_StdErrFilePath(_bstr_t(filename));
        if(FAILED(hr)){
            smpd_err_printf("ERROR: Unable to set the stderr file path\n");
        }

        hr = pjob->AddTask(ptask);
        if(FAILED(hr)){
            smpd_err_printf("ERROR: Adding task to job failed, 0x%x\n", hr);
            smpd_exit_fn(FCNAME);
            return SMPD_FAIL;
        }
        head = head->next;
    }

    /* Set the number of cores required for the job */
    /* FIXME: We are not using the node collection right now */

    smpd_exit_fn(FCNAME);
    return SMPD_SUCCESS;
}
Пример #13
0
int smpd_get_smpd_data(const char *key, char *value, int value_len)
{
#ifdef HAVE_WINDOWS_H
    HKEY tkey;
    DWORD len, result;
    char err_msg[512];

    smpd_enter_fn(FCNAME);

    if (smpd_get_smpd_data_from_environment(key, value, value_len) == SMPD_TRUE)
    {
	smpd_exit_fn(FCNAME);
	return SMPD_SUCCESS;
    }

    result = RegOpenKeyEx(HKEY_LOCAL_MACHINE, SMPD_REGISTRY_KEY, 0, KEY_READ, &tkey);
    if (result != ERROR_SUCCESS)
    {
	if (smpd_get_smpd_data_default(key, value, value_len) != SMPD_SUCCESS)
	{
	    smpd_dbg_printf("Unable to get the data for the key '%s'\n", key);
	    smpd_exit_fn(FCNAME);
	    return SMPD_FAIL;
	}
	smpd_exit_fn(FCNAME);
	return SMPD_SUCCESS;
    }

    len = value_len;
    result = RegQueryValueEx(tkey, key, 0, NULL, (unsigned char *)value, &len);
    if (result != ERROR_SUCCESS)
    {
	RegCloseKey(tkey);
	if (smpd_get_smpd_data_default(key, value, value_len) != SMPD_SUCCESS)
	{
	    smpd_dbg_printf("Unable to get the data for the key '%s'\n", key);
	    smpd_exit_fn(FCNAME);
	    return SMPD_FAIL;
	}
	smpd_exit_fn(FCNAME);
	return SMPD_SUCCESS;
    }

    result = RegCloseKey(tkey);
    if (result != ERROR_SUCCESS)
    {
	smpd_translate_win_error(result, err_msg, 512, "Unable to close the HKEY_LOCAL_MACHINE\\" SMPD_REGISTRY_KEY " registry key, error %d: ", result);
	smpd_err_printf("%s\n", err_msg);
	smpd_exit_fn(FCNAME);
	return SMPD_FAIL;
    }

    smpd_exit_fn(FCNAME);
    return SMPD_SUCCESS;
#else
    int result;
    smpd_data_t *list = NULL, *node;
    int num_bytes;

    smpd_enter_fn(FCNAME);

    smpd_dbg_printf("getting smpd data: %s\n", key);

    if (smpd_get_smpd_data_from_environment(key, value, value_len) == SMPD_TRUE)
    {
	smpd_exit_fn(FCNAME);
	return SMPD_SUCCESS;
    }

    list = smpd_parse_smpd_file();

    if (list)
    {
	int found = 0;
	while (list)
	{
	    node = list;
	    list = list->next;
	    if (strcmp(key, node->name) == 0)
	    {
		strcpy(value, node->value);
		smpd_dbg_printf("smpd data: %s=%s\n", key, value);
		found = 1;
	    }
	    MPIU_Free(node);
	}
	if (found)
	{
	    smpd_exit_fn(FCNAME);
	    return SMPD_SUCCESS;
	}
    }

    result = smpd_get_smpd_data_default(key, value, value_len);
    if (result == SMPD_SUCCESS)
    {
	smpd_dbg_printf("smpd data: %s=%s\n", key, value);
    }
    else
    {
	smpd_dbg_printf("smpd data: failed to get %s\n", key);
    }

    smpd_exit_fn(FCNAME);
    return result;
#endif
}
Пример #14
0
int smpd_get_ccp_nodes(int *np, smpd_host_node_t **host_node_ptr_p)
{
    smpd_host_node_t *host_node_ptr=NULL, **host_list_tail_p=NULL;
    int smpd_node_cnt = 0, np_total, i;
    char *p=NULL, *tok=NULL, *next_tok=NULL;
    char seps[] = " ,\t\n";
    
    smpd_enter_fn(FCNAME);
    if(np == NULL){
        smpd_err_printf("Error: Pointer to num procs is NULL\n");
        smpd_exit_fn(FCNAME);
        return SMPD_FAIL;
    }
    if(host_node_ptr_p == NULL){
        smpd_err_printf("Error: Invalid pointer to host node ptr\n");
        smpd_exit_fn(FCNAME);
        return SMPD_FAIL;
    }
    host_list_tail_p = host_node_ptr_p;
    
    *np = -1;
    np_total = 0;

    /* Get the CCP nodes list 
     * CCP_NODES = <NUM_OF_HOSTS> <HOST1> <NP_HOST1> <HOST2> <NP_HOST2> ...
     */
    p = MPIU_Strdup(getenv("CCP_NODES"));
    if(p == NULL){
        smpd_err_printf("Error: Unable to get the list of CCP nodes\n");
        smpd_exit_fn(FCNAME);
        return SMPD_FAIL;
    }
    smpd_dbg_printf("CCP_NODES = %s\n", p);
    tok = strtok_s(p, seps, &next_tok);

    if(tok == NULL){
        smpd_err_printf("Error: Unable to parse th list of CCP nodes\n");
        MPIU_Free(p);
        smpd_exit_fn(FCNAME);
        return SMPD_FAIL;
    }
    
    smpd_node_cnt = atoi(tok);
    for(i=0; i<smpd_node_cnt; i++){
        char *host;
        int np_host;

        host = strtok_s(NULL, seps, &next_tok);
        if(host == NULL){
            smpd_err_printf("Error: Unable to parse the list of CCP nodes\n");
            MPIU_Free(p);
            smpd_exit_fn(FCNAME);
            return SMPD_FAIL;
        }
        tok = strtok_s(NULL, seps, &next_tok);
        if(tok == NULL){
            smpd_err_printf("Error: Unable to parse the list of CCP nodes\n");
            MPIU_Free(p);
            smpd_exit_fn(FCNAME);
            return SMPD_FAIL;
        }
        np_host = atoi(tok);
        np_total += np_host;
        /* Allocate memory & set node name */
        host_node_ptr = (smpd_host_node_t *)MPIU_Malloc(sizeof(smpd_host_node_t));
        if(host_node_ptr == NULL){
            smpd_err_printf("Unable to allocate memory for smpd host node \n");
            MPIU_Free(p);
            smpd_exit_fn(FCNAME);
            return SMPD_FAIL;
        }
        host_node_ptr->next = NULL;
        host_node_ptr->left = NULL;
        host_node_ptr->right = NULL;
        host_node_ptr->connected = SMPD_FALSE;
        host_node_ptr->connect_cmd_tag = -1;
        host_node_ptr->nproc = np_host;
        host_node_ptr->alt_host[0] = '\0';

        MPIU_Strncpy(host_node_ptr->host, host, SMPD_MAX_HOST_LENGTH);

        /* Add the node to the tail of the list */
        *host_list_tail_p = host_node_ptr;
        host_list_tail_p = &(host_node_ptr->next);
    }

    *np = np_total;
    MPIU_Free(p);
    smpd_exit_fn(FCNAME);
    return SMPD_SUCCESS;
}
Пример #15
0
int mpiexec_rsh()
{
    int i;
    smpd_launch_node_t *launch_node_ptr;
    smpd_process_t *process, **processes;
    int result;
    char *iter1, *iter2;
    char exe[SMPD_MAX_EXE_LENGTH];
    char *p;
    char ssh_cmd[100] = "ssh -x";
    SMPDU_Sock_set_t set;
    SMPD_BOOL escape_escape = SMPD_TRUE;
    char *env_str;
    int maxlen;
    SMPDU_Sock_t abort_sock;
    smpd_context_t *abort_context = NULL;
    smpd_command_t *cmd_ptr;
    PROCESS_HANDLE_TYPE hnd;

    smpd_enter_fn("mpiexec_rsh");

#ifdef HAVE_WINDOWS_H
    SetConsoleCtrlHandler(mpiexec_rsh_handler, TRUE);
#else
    /* setup a signall hander? */
#endif

    p = getenv("MPIEXEC_RSH");
    if (p != NULL && strlen(p) > 0){
	    strncpy(ssh_cmd, p, 100);
    }

    p = getenv("MPIEXEC_RSH_NO_ESCAPE");
    if (p != NULL){
	    if (smpd_is_affirmative(p) || strcmp(p, "1") == 0){
	        escape_escape = SMPD_FALSE;
	    }
    }

    result = SMPDU_Sock_create_set(&set);
    if (result != SMPD_SUCCESS){
	    smpd_err_printf("unable to create a set for the mpiexec_rsh.\n");
	    smpd_exit_fn("mpiexec_rsh");
	    return SMPD_FAIL;
    }

    smpd_process.nproc = smpd_process.launch_list->nproc;

    if (smpd_process.use_pmi_server){
	    result = start_pmi_server(smpd_process.launch_list->nproc, root_host, 100, &root_port);
	    if (result != SMPD_SUCCESS){
	        smpd_err_printf("mpiexec_rsh is unable to start the local pmi server.\n");
	        smpd_exit_fn("mpiexec_rsh");
	        return SMPD_FAIL;
	    }
	    smpd_dbg_printf("the pmi server is listening on %s:%d\n", root_host, root_port);
    }
    else{
	    /* start the root smpd */
	    result = start_root_smpd(root_host, SMPD_MAX_HOST_LENGTH, &root_port, &hnd);
	    if (result != SMPD_SUCCESS){
		    smpd_err_printf("mpiexec_rsh is unable to start the root smpd.\n");
		    smpd_exit_fn("mpiexec_rsh");
		    return SMPD_FAIL;
	    }
	    smpd_dbg_printf("the root smpd is listening on %s:%d\n", root_host, root_port);

	    /* create a connection to the root smpd used to abort the job */
	    result = ConnectToHost(root_host, root_port, SMPD_CONNECTING_RPMI, set, &abort_sock, &abort_context);
	    if (result != SMPD_SUCCESS){
	        smpd_exit_fn("mpiexec_rsh");
	        return SMPD_FAIL;
	    }
    }

    processes = (smpd_process_t**)MPIU_Malloc(sizeof(smpd_process_t*) * smpd_process.launch_list->nproc);
    if (processes == NULL){
	    smpd_err_printf("unable to allocate process array.\n");
	    smpd_exit_fn("mpiexec_rsh");
	    return SMPD_FAIL;
    }

    launch_node_ptr = smpd_process.launch_list;
    for (i=0; i<smpd_process.launch_list->nproc; i++){
	    if (launch_node_ptr == NULL){
		    smpd_err_printf("Error: not enough launch nodes.\n");
		    smpd_exit_fn("mpiexec_rsh");
		    return SMPD_FAIL;
	    }

	    /* initialize process structure */
	    result = smpd_create_process_struct(i, &process);
	    if (result != SMPD_SUCCESS){
		    smpd_err_printf("unable to create a process structure.\n");
		    smpd_exit_fn("mpiexec_rsh");
		    return SMPD_FAIL;
	    }
	    /* no need for a pmi context */
	    if (process->pmi){
		    smpd_free_context(process->pmi);
        }
	    process->pmi = NULL;
	    /* change stdout and stderr to rsh behavior: 
	     * write stdout/err directly to stdout/err instead of creating
	     * an smpd stdout/err command 
	     */
	    if (process->out != NULL){
	        process->out->type = SMPD_CONTEXT_STDOUT_RSH;
	    }
	    if (process->err != NULL){
	        process->err->type = SMPD_CONTEXT_STDERR_RSH;
	    }
	    MPIU_Strncpy(process->clique, launch_node_ptr->clique, SMPD_MAX_CLIQUE_LENGTH);
	    MPIU_Strncpy(process->dir, launch_node_ptr->dir, SMPD_MAX_DIR_LENGTH);
	    MPIU_Strncpy(process->domain_name, smpd_process.kvs_name, SMPD_MAX_DBS_NAME_LEN);
	    MPIU_Strncpy(process->env, launch_node_ptr->env, SMPD_MAX_ENV_LENGTH);
	    if (escape_escape == SMPD_TRUE && smpd_process.mpiexec_run_local != SMPD_TRUE){
		    /* convert \ to \\ to make cygwin ssh happy */
		    iter1 = launch_node_ptr->exe;
		    iter2 = exe;
		    while (*iter1){
			    if (*iter1 == '\\'){
				    *iter2 = *iter1;
				    iter2++;
				    *iter2 = *iter1;
			    }
			    else{
				    *iter2 = *iter1;
			    }
			    iter1++;
			    iter2++;
		    }
		    *iter2 = '\0';
		    /*printf("[%s] -> [%s]\n", launch_node_ptr->exe, exe);*/
	    }
	    else{
	        MPIU_Strncpy(exe, launch_node_ptr->exe, SMPD_MAX_EXE_LENGTH);
	    }

	    /* Two samples for testing on the local machine */

	    /* static rPMI initialization */
	    /*sprintf(process->exe, "env PMI_RANK=%d PMI_SIZE=%d PMI_KVS=%s PMI_ROOT_HOST=%s PMI_ROOT_PORT=8888 PMI_ROOT_LOCAL=1 PMI_APPNUM=%d %s",
		    launch_node_ptr->iproc, launch_node_ptr->nproc, smpd_process.kvs_name, root_host, launch_node_ptr->appnum, exe);*/

	    /* dynamic rPMI initialization */
	    /*sprintf(process->exe, "env PMI_RANK=%d PMI_SIZE=%d PMI_KVS=%s PMI_ROOT_HOST=%s PMI_ROOT_PORT=%d PMI_ROOT_LOCAL=0 PMI_APPNUM=%d %s",
		    launch_node_ptr->iproc, launch_node_ptr->nproc, smpd_process.kvs_name, root_host, root_port, launch_node_ptr->appnum, exe);*/

	    if (smpd_process.mpiexec_run_local == SMPD_TRUE){
		    /* -localonly option and dynamic rPMI initialization */
		    env_str = &process->env[strlen(process->env)];
		    maxlen = (int)(SMPD_MAX_ENV_LENGTH - strlen(process->env));
		    MPIU_Str_add_int_arg(&env_str, &maxlen, "PMI_RANK", launch_node_ptr->iproc);
		    MPIU_Str_add_int_arg(&env_str, &maxlen, "PMI_SIZE", launch_node_ptr->nproc);
		    MPIU_Str_add_string_arg(&env_str, &maxlen, "PMI_KVS", smpd_process.kvs_name);
		    MPIU_Str_add_string_arg(&env_str, &maxlen, "PMI_ROOT_HOST", root_host);
		    MPIU_Str_add_int_arg(&env_str, &maxlen, "PMI_ROOT_PORT", root_port);
		    MPIU_Str_add_string_arg(&env_str, &maxlen, "PMI_ROOT_LOCAL", "0");
		    MPIU_Str_add_int_arg(&env_str, &maxlen, "PMI_APPNUM", launch_node_ptr->appnum);
		    MPIU_Strncpy(process->exe, exe, SMPD_MAX_EXE_LENGTH);
    	}
	    else{
		    /* ssh and dynamic rPMI initialization */
			    char fmtEnv[SMPD_MAX_ENV_LENGTH];
		    int fmtEnvLen = SMPD_MAX_ENV_LENGTH;
		    char *pExe = process->exe;
		    int curLen = 0;
		    MPIU_Snprintf(pExe, SMPD_MAX_EXE_LENGTH, "%s %s env", ssh_cmd, launch_node_ptr->hostname);
		    curLen = strlen(process->exe);
		    pExe = process->exe + curLen;
			if(FmtEnvVarsForSSH(launch_node_ptr->env, fmtEnv, fmtEnvLen)){
			    /* Add user specified env vars */
			    MPIU_Snprintf(pExe, SMPD_MAX_EXE_LENGTH - curLen, "%s", fmtEnv);
			    curLen = strlen(process->exe);
			    pExe = process->exe + curLen;
		    }
		    MPIU_Snprintf(pExe, SMPD_MAX_EXE_LENGTH - curLen, " \"PMI_RANK=%d\" \"PMI_SIZE=%d\" \"PMI_KVS=%s\" \"PMI_ROOT_HOST=%s\" \"PMI_ROOT_PORT=%d\" \"PMI_ROOT_LOCAL=0\" \"PMI_APPNUM=%d\" %s",
		    launch_node_ptr->iproc, launch_node_ptr->nproc, smpd_process.kvs_name, root_host, root_port, launch_node_ptr->appnum, exe);
	    }

	    MPIU_Strncpy(process->kvs_name, smpd_process.kvs_name, SMPD_MAX_DBS_NAME_LEN);
	    process->nproc = launch_node_ptr->nproc;
	    MPIU_Strncpy(process->path, launch_node_ptr->path, SMPD_MAX_PATH_LENGTH);

	    /* call smpd_launch_process */
	    smpd_dbg_printf("launching: %s\n", process->exe);
	    result = smpd_launch_process(process, SMPD_DEFAULT_PRIORITY_CLASS, SMPD_DEFAULT_PRIORITY, SMPD_FALSE, set);
	    if (result != SMPD_SUCCESS){
		    smpd_err_printf("unable to launch process %d <%s>.\n", i, process->exe);
		    smpd_exit_fn("mpiexec_rsh");
		    return SMPD_FAIL;
	    }
	    /* save the new process in the list */
	    process->next = smpd_process.process_list;
	    smpd_process.process_list = process;
	    if (i == 0){
		    /* start the stdin redirection thread to the first process */
		    setup_stdin_redirection(process, set);
	    }

	    smpd_process.nproc_launched++;
	    processes[i] = process;
	    launch_node_ptr = launch_node_ptr->next;
    } /* for (i=0; i<smpd_process.launch_list->nproc; i++) */
    
    if (launch_node_ptr != NULL){
	    smpd_err_printf("Error: too many launch nodes.\n");
	    smpd_exit_fn("mpiexec_rsh");
	    return SMPD_FAIL;
    }

    /* Start the timeout mechanism if specified */
    if (smpd_process.timeout > 0){
	    smpd_context_t *reader_context;
	    SMPDU_Sock_t sock_reader;
	    SMPDU_SOCK_NATIVE_FD reader, writer;
#ifdef HAVE_WINDOWS_H
	    /*SOCKET reader, writer;*/
	    smpd_make_socket_loop((SOCKET*)&reader, (SOCKET*)&writer);
#else
	    /*int reader, writer;*/
	    int pair[2];
	    socketpair(AF_UNIX, SOCK_STREAM, 0, pair);
	    reader = pair[0];
	    writer = pair[1];
#endif
	    result = SMPDU_Sock_native_to_sock(set, reader, NULL, &sock_reader);
	    result = SMPDU_Sock_native_to_sock(set, writer, NULL, &smpd_process.timeout_sock);
	    result = smpd_create_context(SMPD_CONTEXT_TIMEOUT, set, sock_reader, -1, &reader_context);
	    reader_context->read_state = SMPD_READING_TIMEOUT;
	    result = SMPDU_Sock_post_read(sock_reader, &reader_context->read_cmd.cmd, 1, 1, NULL);
#ifdef HAVE_WINDOWS_H
	    /* create a Windows thread to sleep until the timeout expires */
	    smpd_process.timeout_thread = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)timeout_thread, NULL, 0, NULL);
	    if (smpd_process.timeout_thread == NULL){
		    printf("Error: unable to create a timeout thread, errno %d.\n", GetLastError());
		    smpd_exit_fn("mp_parse_command_args");
		    return SMPD_FAIL;
	    }
#else /* HAVE_WINDOWS_H */
#ifdef SIGALRM
	    /* create an alarm to signal mpiexec when the timeout expires */
	    smpd_signal(SIGALRM, timeout_function);
	    alarm(smpd_process.timeout);
#else /* SIGALARM */
#ifdef HAVE_PTHREAD_H
	    /* create a pthread to sleep until the timeout expires */
	    result = pthread_create(&smpd_process.timeout_thread, NULL, timeout_thread, NULL);
	    if (result != 0){
		    printf("Error: unable to create a timeout thread, errno %d.\n", result);
		    smpd_exit_fn("mp_parse_command_args");
		    return SMPD_FAIL;
	    }
#else /* HAVE_PTHREAD_H */
	/* no timeout mechanism available */
#endif /* HAVE_PTHREAD_H */
#endif /* SIGALARM */
#endif /* HAVE_WINDOWS_H */
    } /* if (smpd_process.timeout > 0) */

    result = smpd_enter_at_state(set, SMPD_IDLE);
    if (result != SMPD_SUCCESS){
	    smpd_err_printf("mpiexec_rsh state machine failed.\n");
	    smpd_exit_fn("mpiexec_rsh");
	    return SMPD_FAIL;
    }

    if (smpd_process.use_pmi_server){
	    result = stop_pmi_server();
	    if (result != SMPD_SUCCESS){
		    smpd_err_printf("mpiexec_rsh unable to stop the pmi server.\n");
		    smpd_exit_fn("mpiexec_rsh");
		    return SMPD_FAIL;
	    }
    }
    else{
	    /* Send an abort command to the root_smpd thread/process to insure that it exits.
	     * This only needs to be sent when there is an error or failed process of some sort
	     * but it is safe to send it in all cases.
	     */
	    result = smpd_create_command("abort", 0, 0, SMPD_FALSE, &cmd_ptr);
	    if (result != SMPD_SUCCESS){
		    smpd_err_printf("unable to create an abort command.\n");
		    smpd_exit_fn("mpiexec_rsh");
		    return SMPD_FAIL;
	    }
	    result = smpd_post_write_command(abort_context, cmd_ptr);
	    if (result != SMPD_SUCCESS){
		    /* Only print this as a debug message instead of an error because the root_smpd thread/process may have already exited. */
		    smpd_dbg_printf("unable to post a write of the abort command to the %s context.\n", smpd_get_context_str(abort_context));
		    smpd_exit_fn("mpiexec_rsh");
		    return SMPD_FAIL;
	    }

	    result = stop_root_smpd(hnd);
	    if (result != PMI_SUCCESS){
		    smpd_err_printf("mpiexec_rsh unable to stop the root smpd.\n");
		    smpd_exit_fn("mpiexec_rsh");
		    return SMPD_FAIL;
	    }
    }

    smpd_exit_fn("mpiexec_rsh");
    return 0;
}
Пример #16
0
int smpd_set_smpd_data(const char *key, const char *value)
{
#ifdef HAVE_WINDOWS_H
    HKEY tkey;
    DWORD len, result;
    char err_msg[512];

    smpd_enter_fn(FCNAME);

    if (key == NULL || value == NULL)
    {
	smpd_exit_fn(FCNAME);
	return SMPD_FAIL;
    }

    result = RegCreateKeyEx(HKEY_LOCAL_MACHINE, SMPD_REGISTRY_KEY,
	0, NULL, REG_OPTION_NON_VOLATILE, KEY_ALL_ACCESS, NULL, &tkey, NULL);
    if (result != ERROR_SUCCESS)
    {
	smpd_translate_win_error(result, err_msg, 512, "Unable to open the HKEY_LOCAL_MACHINE\\" SMPD_REGISTRY_KEY " registry key, error %d\n", result);
	smpd_err_printf("%s\n", err_msg);
	smpd_exit_fn(FCNAME);
	return SMPD_FAIL;
    }

    len = (DWORD)(strlen(value)+1);
    result = RegSetValueEx(tkey, key, 0, REG_SZ, (const BYTE *)value, len);
    if (result != ERROR_SUCCESS)
    {
	smpd_translate_win_error(result, err_msg, 512, "Unable to write the smpd registry value '%s:%s', error %d\n", key, value, result);
	smpd_err_printf("%s\n", err_msg);
	RegCloseKey(tkey);
	smpd_exit_fn(FCNAME);
	return SMPD_FAIL;
    }

    result = RegCloseKey(tkey);
    if (result != ERROR_SUCCESS)
    {
	smpd_translate_win_error(result, err_msg, 512, "Unable to close the HKEY_LOCAL_MACHINE\\" SMPD_REGISTRY_KEY " registry key, error %d: ", result);
	smpd_err_printf("%s\n", err_msg);
	smpd_exit_fn(FCNAME);
	return SMPD_FAIL;
    }

    smpd_exit_fn(FCNAME);
    return SMPD_SUCCESS;
#else
    int result;
    smpd_data_t *list = NULL, *node;
    int found = 0;
    FILE *fout;
    char *str;
    int maxlen;
    char buffer[1024];
    char name_str[SMPD_MAX_NAME_LENGTH];
    char value_str[SMPD_MAX_VALUE_LENGTH];

    smpd_enter_fn(FCNAME);

    smpd_dbg_printf("setting smpd data: %s=%s\n", key, value);

    list = smpd_parse_smpd_file();
    fout = smpd_open_smpd_file(SMPD_TRUE);
    if (fout == NULL)
    {
	smpd_err_printf("Unable to open the .smpd file\n");
	smpd_exit_fn(FCNAME);
	return SMPD_FAIL;
    }
    while (list)
    {
	node = list;
	list = list->next;
	if (strcmp(key, node->name) == 0)
	{
	    strcpy(node->value, value);
	    found = 1;
	}
	if (fout)
	{
	    str = buffer;
	    maxlen = 1024;
	    if (MPIU_Str_add_string_arg(&str, &maxlen, node->name, node->value) == MPIU_STR_SUCCESS)
	    {
		buffer[strlen(buffer)-1] = '\0'; /* remove the trailing space */
		smpd_dbg_printf("writing '%s' to .smpd file\n", buffer);
		fprintf(fout, "%s\n", buffer);
	    }
	}
	MPIU_Free(node);
    }
    if (!found && fout)
    {
	str = buffer;
	maxlen = 1024;
	if (MPIU_Str_add_string_arg(&str, &maxlen, key, value) == MPIU_STR_SUCCESS)
	{
	    buffer[strlen(buffer)-1] = '\0'; /* remove the trailing space */
	    smpd_dbg_printf("writing '%s' to .smpd file\n", buffer);
	    fprintf(fout, "%s\n", buffer);
	}
	fclose(fout);
	smpd_exit_fn(FCNAME);
	return SMPD_SUCCESS;
    }
    if (fout != NULL)
    {
	fclose(fout);
	smpd_exit_fn(FCNAME);
	return SMPD_SUCCESS;
    }
    smpd_exit_fn(FCNAME);
    return SMPD_FAIL;
#endif
}
Пример #17
0
int smpd_add_string_arg(char **str_ptr, int *maxlen_ptr, const char *flag, const char *val)
{
    int num_chars;

    smpd_enter_fn(FCNAME);
    if (*maxlen_ptr < 1)
    {
	smpd_exit_fn(FCNAME);
	return SMPD_FAIL;
    }

    /* add the flag */
    if (strstr(flag, " ") || strstr(flag, SMPD_DELIM_STR) || flag[0] == SMPD_QUOTE_CHAR)
    {
	num_chars = quoted_printf(*str_ptr, *maxlen_ptr, flag);
    }
    else
    {
	num_chars = snprintf(*str_ptr, *maxlen_ptr, "%s", flag);
    }
    *maxlen_ptr = *maxlen_ptr - num_chars;
    if (*maxlen_ptr < 1)
    {
	(*str_ptr)[num_chars-1] = '\0';
	smpd_dbg_printf("partial argument added to string: '%s'\n", *str_ptr);
	smpd_exit_fn(FCNAME);
	return SMPD_FAIL;
    }
    *str_ptr = *str_ptr + num_chars;

    /* add the deliminator character */
    **str_ptr = SMPD_DELIM_CHAR;
    *str_ptr = *str_ptr + 1;
    *maxlen_ptr = *maxlen_ptr - 1;

    /* add the value string */
    if (strstr(val, " ") || strstr(val, SMPD_DELIM_STR) || val[0] == SMPD_QUOTE_CHAR)
    {
	num_chars = quoted_printf(*str_ptr, *maxlen_ptr, val);
    }
    else
    {
	num_chars = snprintf(*str_ptr, *maxlen_ptr, "%s", val);
    }
    *str_ptr = *str_ptr + num_chars;
    *maxlen_ptr = *maxlen_ptr - num_chars;
    if (*maxlen_ptr < 2)
    {
	*str_ptr = *str_ptr - 1;
	**str_ptr = '\0';
	smpd_dbg_printf("partial argument added to string: '%s'\n", *str_ptr);
	smpd_exit_fn(FCNAME);
	return SMPD_FAIL;
    }
    
    /* add the trailing space */
    **str_ptr = SMPD_SEPAR_CHAR;
    *str_ptr = *str_ptr + 1;
    **str_ptr = '\0';
    *maxlen_ptr = *maxlen_ptr - 1;

    smpd_exit_fn(FCNAME);
    return SMPD_SUCCESS;
}
Пример #18
0
int smpd_get_default_hosts()
{
    char hosts[8192];
    char *host;
    char *ncpu;
    smpd_host_node_t *cur_host, *iter;
#ifdef HAVE_WINDOWS_H
    DWORD len;
#else
    int dynamic = SMPD_FALSE;
    char myhostname[SMPD_MAX_HOST_LENGTH];
    int found;
#endif

    smpd_enter_fn(FCNAME);

    if (smpd_process.default_host_list != NULL && smpd_process.cur_default_host != NULL)
    {
	smpd_dbg_printf("default list already populated, returning success.\n");
	smpd_exit_fn(FCNAME);
	return SMPD_SUCCESS;
    }

    if (smpd_get_smpd_data("hosts", hosts, 8192) != SMPD_SUCCESS)
    {
#ifdef HAVE_WINDOWS_H
	len = 8192;
	/*if (GetComputerName(hosts, &len))*/
	if (GetComputerNameEx(ComputerNameDnsFullyQualified, hosts, &len))
	{
	    smpd_process.default_host_list = (smpd_host_node_t*)MPIU_Malloc(sizeof(smpd_host_node_t));
	    if (smpd_process.default_host_list == NULL)
	    {
		smpd_exit_fn(FCNAME);
		return SMPD_FAIL;
	    }
	    strcpy(smpd_process.default_host_list->host, hosts);
	    smpd_process.default_host_list->alt_host[0] = '\0';
	    smpd_process.default_host_list->nproc = 1;
	    smpd_process.default_host_list->connected = SMPD_FALSE;
	    smpd_process.default_host_list->connect_cmd_tag = -1;
	    smpd_process.default_host_list->next = smpd_process.default_host_list;
	    smpd_process.default_host_list->left = NULL;
	    smpd_process.default_host_list->right = NULL;
	    smpd_process.cur_default_host = smpd_process.default_host_list;
	    smpd_exit_fn(FCNAME);
	    return SMPD_SUCCESS;
	}
	smpd_exit_fn(FCNAME);
	return SMPD_FAIL;
#else
	if (smpd_option_on("no_dynamic_hosts"))
	{
	    if (smpd_get_hostname(myhostname, SMPD_MAX_HOST_LENGTH) == SMPD_SUCCESS)
	    {
		smpd_process.default_host_list = (smpd_host_node_t*)MPIU_Malloc(sizeof(smpd_host_node_t));
		if (smpd_process.default_host_list == NULL)
		{
		    smpd_exit_fn(FCNAME);
		    return SMPD_FAIL;
		}
		strcpy(smpd_process.default_host_list->host, myhostname);
		smpd_process.default_host_list->alt_host[0] = '\0';
		smpd_process.default_host_list->nproc = 1;
		smpd_process.default_host_list->connected = SMPD_FALSE;
		smpd_process.default_host_list->connect_cmd_tag = -1;
		smpd_process.default_host_list->next = smpd_process.default_host_list;
		smpd_process.default_host_list->left = NULL;
		smpd_process.default_host_list->right = NULL;
		smpd_process.cur_default_host = smpd_process.default_host_list;
		smpd_exit_fn(FCNAME);
		return SMPD_SUCCESS;
	    }
	    smpd_exit_fn(FCNAME);
	    return SMPD_FAIL;
	}

	smpd_lock_smpd_data();
	if (smpd_get_smpd_data(SMPD_DYNAMIC_HOSTS_KEY, hosts, 8192) != SMPD_SUCCESS)
	{
	    smpd_unlock_smpd_data();
	    if (smpd_get_hostname(hosts, 8192) == 0)
	    {
		smpd_process.default_host_list = (smpd_host_node_t*)MPIU_Malloc(sizeof(smpd_host_node_t));
		if (smpd_process.default_host_list == NULL)
		{
		    smpd_exit_fn(FCNAME);
		    return SMPD_FAIL;
		}
		strcpy(smpd_process.default_host_list->host, hosts);
		smpd_process.default_host_list->alt_host[0] = '\0';
		smpd_process.default_host_list->nproc = 1;
		smpd_process.default_host_list->connected = SMPD_FALSE;
		smpd_process.default_host_list->connect_cmd_tag = -1;
		smpd_process.default_host_list->next = smpd_process.default_host_list;
		smpd_process.default_host_list->left = NULL;
		smpd_process.default_host_list->right = NULL;
		smpd_process.cur_default_host = smpd_process.default_host_list;
		/* add this host to the dynamic_hosts key */
		strcpy(myhostname, hosts);
		smpd_lock_smpd_data();
		hosts[0] = '\0';
		smpd_get_smpd_data(SMPD_DYNAMIC_HOSTS_KEY, hosts, 8192);
		if (strlen(hosts) > 0)
		{
		    /* FIXME this could overflow */
		    strcat(hosts, " ");
		    strcat(hosts, myhostname);
		}
		else
		{
		    strcpy(hosts, myhostname);
		}
		smpd_set_smpd_data(SMPD_DYNAMIC_HOSTS_KEY, hosts);
		smpd_unlock_smpd_data();
		smpd_exit_fn(FCNAME);
		return SMPD_SUCCESS;
	    }
	    smpd_exit_fn(FCNAME);
	    return SMPD_FAIL;
	}
	smpd_unlock_smpd_data();
	if (smpd_get_hostname(myhostname, SMPD_MAX_HOST_LENGTH) != 0)
	{
	    dynamic = SMPD_FALSE;
	    myhostname[0] = '\0';
	}
	else
	{
	    dynamic = SMPD_TRUE;
	}
#endif
    }

    /* FIXME: Insert code here to parse a compressed host string */
    /* For now, just use a space separated list of host names */

    host = strtok(hosts, " \t\r\n");
    while (host)
    {
	cur_host = (smpd_host_node_t*)MPIU_Malloc(sizeof(smpd_host_node_t));
	if (cur_host != NULL)
	{
	    /*printf("default host: %s\n", host);*/
	    strcpy(cur_host->host, host);
	    cur_host->alt_host[0] = '\0';
	    cur_host->nproc = 1;
	    ncpu = strstr(cur_host->host, ":");
	    if (ncpu)
	    {
		*ncpu = '\0';
		ncpu++;
		cur_host->nproc = atoi(ncpu);
		if (cur_host->nproc < 1)
		    cur_host->nproc = 1;
	    }
	    cur_host->connected = SMPD_FALSE;
	    cur_host->connect_cmd_tag = -1;
	    cur_host->next = NULL;
	    cur_host->left = NULL;
	    cur_host->right = NULL;
	    if (smpd_process.default_host_list == NULL)
	    {
		smpd_process.default_host_list = cur_host;
	    }
	    else
	    {
		iter = smpd_process.default_host_list;
		while (iter->next)
		    iter = iter->next;
		iter->next = cur_host;
	    }
	}
	host = strtok(NULL, " \t\r\n");
    }
    if (smpd_process.default_host_list)
    {
#ifndef HAVE_WINDOWS_H
	if (dynamic)
	{
	    found = SMPD_FALSE;
	    iter = smpd_process.default_host_list;
	    while (iter)
	    {
		if (strcmp(iter->host, myhostname) == 0)
		{
		    found = SMPD_TRUE;
		    break;
		}
		iter = iter->next;
	    }
	    if (!found)
	    {
		/* add this host to the dynamic_hosts key */
		smpd_lock_smpd_data();
		hosts[0] = '\0';
		smpd_get_smpd_data(SMPD_DYNAMIC_HOSTS_KEY, hosts, 8192);
		if (strlen(hosts) > 0)
		{
		    /* FIXME this could overflow */
		    strcat(hosts, " ");
		    strcat(hosts, myhostname);
		}
		else
		{
		    strcpy(hosts, myhostname);
		}
		smpd_set_smpd_data(SMPD_DYNAMIC_HOSTS_KEY, hosts);
		smpd_unlock_smpd_data();
	    }
	}
#endif
	/* make the default list into a ring */
	iter = smpd_process.default_host_list;
	while (iter->next)
	    iter = iter->next;
	iter->next = smpd_process.default_host_list;
	/* point the cur_default_host to the first node in the ring */
	smpd_process.cur_default_host = smpd_process.default_host_list;
    }

    smpd_exit_fn(FCNAME);
    return SMPD_SUCCESS;
}
Пример #19
0
int smpd_post_abort_command(char *fmt, ...)
{
    int result;
    char error_str[2048] = "";
    smpd_command_t *cmd_ptr;
    smpd_context_t *context;
    va_list list;

    smpd_enter_fn(FCNAME);

    va_start(list, fmt);
    vsnprintf(error_str, 2048, fmt, list);
    va_end(list);

    result = smpd_create_command("abort", smpd_process.id, 0, SMPD_FALSE, &cmd_ptr);
    if (result != SMPD_SUCCESS)
    {
	smpd_err_printf("unable to create an abort command.\n");
	smpd_exit_fn(FCNAME);
	return SMPD_FAIL;
    }
    result = smpd_add_command_arg(cmd_ptr, "error", error_str);
    if (result != SMPD_SUCCESS)
    {
	smpd_err_printf("Unable to add the error string to the abort command.\n");
	smpd_exit_fn(FCNAME);
	return SMPD_FAIL;
    }
    result = smpd_command_destination(0, &context);
    if(result != SMPD_SUCCESS){
        smpd_err_printf("Unable to find destination for command...Aborting: %s\n", error_str);
        smpd_exit_fn(FCNAME);
        return SMPD_FAIL;
    }

    if (context == NULL)
    {
	if (smpd_process.left_context == NULL)
	{
	    printf("Aborting: %s\n", error_str);
	    fflush(stdout);
	    smpd_exit_fn(FCNAME);
	    smpd_exit(-1);
	}

	smpd_process.closing = SMPD_TRUE;
	result = smpd_create_command("close", 0, 1, SMPD_FALSE, &cmd_ptr);
	if (result != SMPD_SUCCESS)
	{
	    smpd_err_printf("unable to create the close command to tear down the job tree.\n");
	    smpd_exit_fn(FCNAME);
	    return SMPD_FAIL;
	}
	result = smpd_post_write_command(smpd_process.left_context, cmd_ptr);
	if (result != SMPD_SUCCESS)
	{
	    smpd_err_printf("unable to post a write of the close command to tear down the job tree as part of the abort process.\n");
	    smpd_exit_fn(FCNAME);
	    return SMPD_FAIL;
	}
    }
    else
    {
	smpd_dbg_printf("sending abort command to %s context: \"%s\"\n", smpd_get_context_str(context), cmd_ptr->cmd);
	result = smpd_post_write_command(context, cmd_ptr);
	if (result != SMPD_SUCCESS)
	{
	    smpd_err_printf("unable to post a write of the abort command to the %s context.\n", smpd_get_context_str(context));
	    smpd_exit_fn(FCNAME);
	    return SMPD_FAIL;
	}
    }
    smpd_exit_fn(FCNAME);
    return SMPD_SUCCESS;
}
Пример #20
0
int smpd_spn_list_init(smpd_spn_list_hnd_t *spn_list_hnd_p)
{
    HRESULT hr;
    int result = SMPD_SUCCESS;
    IDirectoryObject *pSCP = NULL;
    ADS_ATTR_INFO *pPropEntries = NULL;
    IDirectorySearch *pSearch = NULL;
    ADS_SEARCH_HANDLE hSearch = NULL;
    LPWSTR pszDN;                  /* distinguished name of SCP. */
    LPWSTR pszServiceDNSName;      /* service DNS name. */
    LPWSTR pszClass;               /* name of service class. */
    USHORT usPort;                 /* service port. */
    WCHAR pszSearchString[SMPD_MAX_NAME_LENGTH];
    char service_class[SMPD_MAX_NAME_LENGTH];
    smpd_host_spn_node_t *iter;
    smpd_host_spn_node_t *spn_list_head=NULL;

    smpd_enter_fn(FCNAME);
    if(spn_list_hnd_p == NULL){
        smpd_err_printf("Invalid pointer to SPN list handle\n");
        smpd_exit_fn(FCNAME);
        return SMPD_FAIL;
    }

    spn_list_head = NULL;

    /* Allocate memory for spn list handle contents. Currently the handle just has the head
     * ptr to the list 
     */
    *spn_list_hnd_p = (smpd_host_spn_node_t **) MPIU_Malloc(sizeof(smpd_host_spn_node_t *));
    if(*spn_list_hnd_p == NULL){
        smpd_err_printf("Unable to allocate memory for list handle\n");
        smpd_exit_fn(FCNAME);
        return SMPD_FAIL;
    }

    **spn_list_hnd_p = NULL;

    CoInitialize(NULL);

    /* Get an IDirectorySearch pointer for the Global Catalog.  */
    hr = GetGCSearch(&pSearch);
    if (FAILED(hr) || pSearch == NULL) {
	    smpd_err_printf("GetGC failed 0x%x\n", hr);
        result = SMPD_FAIL;
	    goto Cleanup;
    }

    /* Set up a deep search.
      Thousands of objects are not expected, therefore
      query for 1000 rows per page.*/
    ADS_SEARCHPREF_INFO SearchPref[2];
    DWORD dwPref = sizeof(SearchPref)/sizeof(ADS_SEARCHPREF_INFO);
    SearchPref[0].dwSearchPref =    ADS_SEARCHPREF_SEARCH_SCOPE;
    SearchPref[0].vValue.dwType =   ADSTYPE_INTEGER;
    SearchPref[0].vValue.Integer =  ADS_SCOPE_SUBTREE;

    SearchPref[1].dwSearchPref =    ADS_SEARCHPREF_PAGESIZE;
    SearchPref[1].vValue.dwType =   ADSTYPE_INTEGER;
    SearchPref[1].vValue.Integer =  1000;

    hr = pSearch->SetSearchPreference(SearchPref, dwPref);
    if (FAILED(hr)){
	    smpd_err_printf("Failed to set search prefs: hr:0x%x\n", hr);
        result = SMPD_FAIL;
	    goto Cleanup;
    }

    /* Execute the search. From the GC get the distinguished name 
      of the SCP. Use the DN to bind to the SCP and get the other 
      properties. */
    LPWSTR rgszDN[] = {L"distinguishedName"};

    /* Search for a match of the product GUID. */
    swprintf(pszSearchString, L"(keywords=%s)", SMPD_SERVICE_VENDOR_GUIDW);
    hr = pSearch->ExecuteSearch(pszSearchString, rgszDN, 1, &hSearch);
    /*hr = pSearch->ExecuteSearch(L"keywords=5722fe5f-cf46-4594-af7c-0997ca2e9d72", rgszDN, 1, &hSearch);*/
    if (FAILED(hr)){
	    smpd_err_printf("ExecuteSearch failed: hr:0x%x\n", hr);
        result = SMPD_FAIL;
	    goto Cleanup;
    }

    /* Loop through the results. Each row should be an instance of the 
      service identified by the product GUID.
      Add logic to select from multiple service instances. */
    while (SUCCEEDED(hr = pSearch->GetNextRow(hSearch))){
        DWORD dwError = ERROR_SUCCESS;
        WCHAR szError[512];
        WCHAR szProvider[512];

        if (hr == S_ADS_NOMORE_ROWS){
            ADsGetLastError(&dwError, szError, 512, szProvider, 512);
            if (ERROR_MORE_DATA == dwError){
                continue;
            }
            smpd_dbg_printf("No more result rows from GC\n");
            result = SMPD_SUCCESS;
            goto Cleanup;
	    }

        ADS_SEARCH_COLUMN Col;

        hr = pSearch->GetColumn(hSearch, L"distinguishedName", &Col);
        if(FAILED(hr)){
            smpd_err_printf("Failed to get Distinguished Name for SPN\n");
            result = SMPD_FAIL;
            goto Cleanup;
        }
        pszDN = AllocADsStr(Col.pADsValues->CaseIgnoreString);
        if(pszDN == NULL){
            ADsGetLastError(&dwError, szError, 512, szProvider, 512);
            smpd_err_printf("Failed to allocate memory for ADs string, 0x%x\n", dwError);
            result = SMPD_FAIL;
            goto Cleanup;
        }
        pSearch->FreeColumn(&Col);

        /* Bind to the DN to get the other properties. */
        LPWSTR lpszLDAPPrefix = L"LDAP://";
        DWORD dwSCPPathLength = (DWORD)(wcslen(lpszLDAPPrefix) + wcslen(pszDN) + 1);
        LPWSTR pwszSCPPath = (LPWSTR)malloc(sizeof(WCHAR) * dwSCPPathLength);
        if (pwszSCPPath){
            wcscpy(pwszSCPPath, lpszLDAPPrefix);
            wcscat(pwszSCPPath, pszDN);
        }
        else{
	        smpd_err_printf("Failed to allocate a buffer\n");
            result = SMPD_FAIL;
	        goto Cleanup;
	    }               
        /*wprintf(L"pszDN = %s\n", pszDN);*/

        hr = ADsGetObject(pwszSCPPath, IID_IDirectoryObject, (void**)&pSCP);
        free(pwszSCPPath);

        if (SUCCEEDED(hr)) {
            /* Properties to retrieve from the SCP object. */
            LPWSTR rgszAttribs[]=
                {
                    {L"serviceClassName"},
                    {L"serviceDNSName"},
                    /*{L"serviceDNSNameType"},*/
                    {L"serviceBindingInformation"}
                };

            DWORD dwAttrs = sizeof(rgszAttribs)/sizeof(LPWSTR);
            DWORD dwNumAttrGot;
            hr = pSCP->GetObjectAttributes(rgszAttribs, dwAttrs, &pPropEntries, &dwNumAttrGot);
            if (FAILED(hr)){
                smpd_err_printf("GetObjectAttributes Failed. hr:0x%x\n", hr);
                result = SMPD_FAIL;
                goto Cleanup;
            }

            pszServiceDNSName = NULL;
            pszClass = NULL;
            iter = (smpd_host_spn_node_t*)malloc(sizeof(smpd_host_spn_node_t));
            if (iter == NULL){
                smpd_err_printf("Unable to allocate memory to store an SPN entry.\n");
                result = SMPD_FAIL;
                goto Cleanup;
            }
            iter->next = NULL;
            iter->host[0] = '\0';
            iter->dnshost[0] = '\0';
            iter->spn[0] = '\0';

            /* Loop through the entries returned by GetObjectAttributes 
             * and save the values in the appropriate buffers.
             */
            for (int i = 0; i < (LONG)dwAttrs; i++){
                if ((wcscmp(L"serviceDNSName", pPropEntries[i].pszAttrName) == 0) &&
                        (pPropEntries[i].dwADsType == ADSTYPE_CASE_IGNORE_STRING)){
                    pszServiceDNSName = AllocADsStr(pPropEntries[i].pADsValues->CaseIgnoreString);
                    /*wprintf(L"pszServiceDNSName = %s\n", pszServiceDNSName);*/
                }
                if ((wcscmp(L"serviceClassName", pPropEntries[i].pszAttrName) == 0) &&
                        (pPropEntries[i].dwADsType == ADSTYPE_CASE_IGNORE_STRING)){
                    pszClass = AllocADsStr(pPropEntries[i].pADsValues->CaseIgnoreString);
                    /*wprintf(L"pszClass = %s\n", pszClass);*/
                }
                if ((wcscmp(L"serviceBindingInformation", pPropEntries[i].pszAttrName) == 0) &&
                    (pPropEntries[i].dwADsType == ADSTYPE_CASE_IGNORE_STRING)){
                    usPort=(USHORT)_wtoi(pPropEntries[i].pADsValues->CaseIgnoreString);
                    /*wprintf(L"usPort = %d\n", usPort);*/
                }
            } /* for(;;) */
            if(pszServiceDNSName != NULL){
                wcstombs(iter->dnshost, pszServiceDNSName, SMPD_MAX_NAME_LENGTH);
            }
            wcstombs(service_class, pszClass, SMPD_MAX_NAME_LENGTH);
            /*MPIU_Snprintf(iter->spn, SMPD_MAX_NAME_LENGTH, "%s/%s:%d", temp_str, iter->dnshost, usPort);*/
            wcstombs(iter->fq_service_name, pszDN, SMPD_MAX_FQ_NAME_LENGTH);
            /* MPIU_Snprintf(iter->spn, SMPD_MAX_NAME_LENGTH, "%s/%s/%s", temp_str, iter->dnshost, temp_str2); */
            SMPD_INIT_SPN(iter->spn, SMPD_MAX_FQ_NAME_LENGTH, service_class, iter->dnshost, iter->fq_service_name);

            iter->next = spn_list_head;
	        spn_list_head = iter;
            if (pszServiceDNSName != NULL){
                FreeADsStr(pszServiceDNSName);
            }
            if (pszClass != NULL){
                FreeADsStr(pszClass);
            }
        }
        FreeADsStr(pszDN);
    } /* GetNextRow() */

Cleanup:

    **spn_list_hnd_p = spn_list_head;
    smpd_spn_list_dbg_print(*spn_list_hnd_p);

    if (pSCP){
        pSCP->Release();
        pSCP = NULL;
    }

    if (pPropEntries){
        FreeADsMem(pPropEntries);
        pPropEntries = NULL;
    }

    if (pSearch){
        if (hSearch){
            pSearch->CloseSearchHandle(hSearch);
            hSearch = NULL;
        }

        pSearch->Release();
        pSearch = NULL;
    }
    
    CoUninitialize();

    smpd_exit_fn(FCNAME);
    return result;
}
Пример #21
0
/* FIXME: Why is this func defined here ? 
 * - shouldn't this be in smpd_util*.lib ?
 */
static int root_smpd(void *p)
{
    int result;
    SMPDU_Sock_set_t set;
    SMPDU_Sock_t listener;
    mpiexec_rsh_rsmpd_args_t *pArgs = NULL;
    smpd_process_group_t *pg;
    int i, rootPort;
#ifndef HAVE_WINDOWS_H
    int send_kvs = 0;
    int pipe_fd;
#endif


    pArgs = (mpiexec_rsh_rsmpd_args_t *)p;
    if(!pArgs){
        smpd_err_printf("Invalid args - NULL - to root smpd\n");
        return SMPD_FAIL;
    }
    smpd_process.id = 1;
    smpd_process.root_smpd = SMPD_FALSE;
    smpd_process.map0to1 = SMPD_TRUE;

    result = SMPDU_Sock_create_set(&set);
    if (result != SMPD_SUCCESS){
	    smpd_err_printf(result, "SMPDU_Sock_create_set failed.\n");
	    return SMPD_FAIL;
    }
    smpd_process.set = set;
    smpd_dbg_printf("created a set for the listener: %d\n", SMPDU_Sock_get_sock_set_id(set));
    result = SMPDU_Sock_listen(set, NULL, &rootPort, &listener); 
    if (result != SMPD_SUCCESS){
	    smpd_err_printf(result, "SMPDU_Sock_listen failed.\n");
	    return SMPD_FAIL;
    }
    smpd_dbg_printf("smpd listening on port %d\n", rootPort);

    result = smpd_create_context(SMPD_CONTEXT_LISTENER, set, listener, -1, &smpd_process.listener_context);
    if (result != SMPD_SUCCESS){
	    smpd_err_printf("unable to create a context for the smpd listener.\n");
	    return SMPD_FAIL;
    }

    result = SMPDU_Sock_set_user_ptr(listener, smpd_process.listener_context);
    if (result != SMPD_SUCCESS){
	    smpd_err_printf(result, "SMPDU_Sock_set_user_ptr failed.\n");
	    return SMPD_FAIL;
    }
    smpd_process.listener_context->state = SMPD_SMPD_LISTENING;

    smpd_dbs_init();
    smpd_process.have_dbs = SMPD_TRUE;
    if (smpd_process.kvs_name[0] != '\0'){
	    result = smpd_dbs_create_name_in(smpd_process.kvs_name);
    }
    else{
	    result = smpd_dbs_create(smpd_process.kvs_name);
#ifndef HAVE_WINDOWS_H
	    send_kvs = 1;
#endif
    }
    if (result != SMPD_DBS_SUCCESS){
	    smpd_err_printf("unable to create a kvs database: name = <%s>.\n", smpd_process.kvs_name);
	    return SMPD_FAIL;
    }

    /* Set up the process group */
    /* initialize a new process group structure */
    pg = (smpd_process_group_t*)MPIU_Malloc(sizeof(smpd_process_group_t));
    if (pg == NULL){
	    smpd_err_printf("unable to allocate memory for a process group structure.\n");
	    return SMPD_FAIL;
    }

    pg->aborted = SMPD_FALSE;
    pg->any_init_received = SMPD_FALSE;
    pg->any_noinit_process_exited = SMPD_FALSE;
    strncpy(pg->kvs, smpd_process.kvs_name, SMPD_MAX_DBS_NAME_LEN);
    pg->num_procs = smpd_process.nproc;
    pg->processes = (smpd_exit_process_t*)MPIU_Malloc(smpd_process.nproc * sizeof(smpd_exit_process_t));
    if (pg->processes == NULL){
	    smpd_err_printf("unable to allocate an array of %d process exit structures.\n", smpd_process.nproc);
	    return SMPD_FAIL;
    }
    for (i=0; i<smpd_process.nproc; i++){
	    pg->processes[i].ctx_key[0] = '\0';
	    pg->processes[i].errmsg = NULL;
	    pg->processes[i].exitcode = -1;
	    pg->processes[i].exited = SMPD_FALSE;
	    pg->processes[i].finalize_called = SMPD_FALSE;
	    pg->processes[i].init_called = SMPD_FALSE;
	    pg->processes[i].node_id = i+1;
	    pg->processes[i].host[0] = '\0';
	    pg->processes[i].suspended = SMPD_FALSE;
	    pg->processes[i].suspend_cmd = NULL;
    }
    /* add the process group to the global list */
    pg->next = smpd_process.pg_list;
    smpd_process.pg_list = pg;

#ifdef HAVE_WINDOWS_H
    *(pArgs->pRootPort) = rootPort;
    SetEvent(pArgs->hRootSMPDRdyEvent);
#else
    if (p != NULL){
	    /*pipe_fd = *(int*)p;*/
        pipe_fd = pArgs->pipe_fd;
	    /* send the root port back over the pipe */
	    writebuf(pipe_fd, &rootPort, sizeof(int));
	    if (send_kvs){
	        writebuf(pipe_fd, smpd_process.kvs_name, SMPD_MAX_DBS_NAME_LEN);
	    }  
	    close(pipe_fd);
    }
#endif

    result = smpd_enter_at_state(set, SMPD_SMPD_LISTENING);
    if (result != SMPD_SUCCESS){
	    smpd_err_printf("root_smpd state machine failed.\n");
	    return SMPD_FAIL;
    }

    result = SMPDU_Sock_destroy_set(set);
    if (result != SMPD_SUCCESS){
	    smpd_err_printf("unable to destroy the set (result = %d)\n", result);
    }

    return SMPD_SUCCESS;
}
Пример #22
0
int smpd_parse_command_args(int *argcp, char **argvp[])
{
    int result = 0;
#ifdef HAVE_WINDOWS_H
    char str[20], read_handle_str[20], write_handle_str[20];
    int port;
    SMPDU_Sock_t listener;
    SMPDU_Sock_set_t set;
    HANDLE hWrite, hRead;
    DWORD num_written, num_read;
#endif
    int dbg_flag;
    char domain[SMPD_MAX_HOST_LENGTH];
    char opt[SMPD_MAX_NAME_LENGTH];
    char opt_val[SMPD_MAX_VALUE_LENGTH];
    char filename[SMPD_MAX_FILENAME];
    int i;

    smpd_enter_fn(FCNAME);

    /* check for help option */
    if (
#ifndef HAVE_WINDOWS_H
	*argcp < 2 || /* unix: print the options if no arguments are supplied */
#endif
	smpd_get_opt(argcp, argvp, "-help") || smpd_get_opt(argcp, argvp, "-?"))
    {
	smpd_print_options();
	smpd_exit(0);
    }

    /* check for the printprocs option */
    if (smpd_get_opt(argcp, argvp, "-printprocs"))
    {
	smpd_watch_processes();
	smpd_exit(0);
    }

    if (smpd_get_opt(argcp, argvp, "-hosts"))
    {
	char first_host[SMPD_MAX_HOST_LENGTH], host[SMPD_MAX_HOST_LENGTH], alt_host[SMPD_MAX_HOST_LENGTH];

	smpd_get_default_hosts();

	result = smpd_get_next_hostname(first_host, alt_host);
	if (result != SMPD_SUCCESS)
	    smpd_exit(result);
	printf("%s\n", first_host);
	result = smpd_get_next_hostname(host, alt_host);
	if (result != SMPD_SUCCESS)
	    smpd_exit(result);
	while (strcmp(host, first_host) != 0)
	{
	    printf("%s\n", host);
	    result = smpd_get_next_hostname(host, alt_host);
	    if (result != SMPD_SUCCESS)
		smpd_exit(result);
	}
	smpd_exit(0);
    }

    if (smpd_get_opt(argcp, argvp, "-sethosts"))
    {
	char *buffer, *iter;
	int i, length;

	length = (*argcp) * SMPD_MAX_HOST_LENGTH;
	buffer = MPIU_Malloc(length);
	if (buffer == NULL)
	{
	    smpd_err_printf("unable to allocate memory to store the host names.\n");
	    smpd_exit(-1);
	}
	iter = buffer;
	for (i=1; i<*argcp; i++)
	{
	    result = MPIU_Str_add_string(&iter, &length, (*argvp)[i]);
	    if (result)
	    {
		printf("unable to add host #%d, %s\n", i, (*argvp)[i]);
		MPIU_Free(buffer);
		smpd_exit(-1);
	    }
	}
	/*printf("hosts: %s\n", buffer);*/
	result = smpd_set_smpd_data("hosts", buffer);
	if (result == SMPD_SUCCESS)
	{
	    printf("hosts data saved successfully.\n");
	}
	else
	{
	    printf("Error: unable to save the hosts data.\n");
	}
	MPIU_Free(buffer);
	smpd_exit(0);
    }

    if (smpd_get_opt_two_strings(argcp, argvp, "-set", opt, SMPD_MAX_NAME_LENGTH, opt_val, SMPD_MAX_VALUE_LENGTH))
    {
	/* The do loop allows for multiple -set operations to be specified on the command line */
	do
	{
	    if (strlen(opt) == 0)
	    {
		printf("invalid option specified.\n");
		smpd_exit(-1);
	    }
	    if (strlen(opt_val) == 0)
	    {
		result = smpd_delete_smpd_data(opt);
	    }
	    else
	    {
		result = smpd_set_smpd_data(opt, opt_val);
	    }
	    if (result == SMPD_SUCCESS)
	    {
		printf("%s = %s\n", opt, opt_val);
	    }
	    else
	    {
		printf("unable to set %s option.\n", opt);
	    }
	} while (smpd_get_opt_two_strings(argcp, argvp, "-set", opt, SMPD_MAX_NAME_LENGTH, opt_val, SMPD_MAX_VALUE_LENGTH));
	smpd_exit(0);
    }

    if (smpd_get_opt_string(argcp, argvp, "-get", opt, SMPD_MAX_NAME_LENGTH))
    {
	if (strlen(opt) == 0)
	{
	    printf("invalid option specified.\n");
	    smpd_exit(-1);
	}
	result = smpd_get_smpd_data(opt, opt_val, SMPD_MAX_VALUE_LENGTH);
	if (result == SMPD_SUCCESS)
	{
	    printf("%s\n", opt_val);
	}
	else
	{
	    printf("default\n");
	}
	smpd_exit(0);
    }

    /* If we've made it here and there still is "-set" or "-get" on the command line then the user
     * probably didn't supply the correct number of parameters.  So print the usage message
     * and exit.
     */
    if (smpd_get_opt(argcp, argvp, "-set") || smpd_get_opt(argcp, argvp, "-get"))
    {
	smpd_print_options();
	smpd_exit(-1);
    }

    if (smpd_get_opt(argcp, argvp, "-enumerate") || smpd_get_opt(argcp, argvp, "-enum"))
    {
	smpd_data_t *data;
	if (smpd_get_all_smpd_data(&data) == SMPD_SUCCESS)
	{
	    smpd_data_t *iter = data;
	    while (iter != NULL)
	    {
		printf("%s\n%s\n", iter->name, iter->value);
		iter = iter->next;
	    }
	    while (data != NULL)
	    {
		iter = data;
		data = data->next;
		MPIU_Free(iter);
	    }
	}
	smpd_exit(0);
    }

    if (smpd_get_opt_string(argcp, argvp, "-query", domain, SMPD_MAX_HOST_LENGTH))
    {
	printf("querying hosts in the %s domain:\n", domain);
	printf("Not implemented.\n");
	smpd_exit(0);
    }
    if (smpd_get_opt(argcp, argvp, "-query"))
    {
	printf("querying hosts in the default domain:\n");
	printf("Not implemented.\n");
	smpd_exit(0);
    }

    /* check for the service/silent option */
#ifdef HAVE_WINDOWS_H
    smpd_process.bService = SMPD_TRUE;
#endif

    if (smpd_get_opt(argcp, argvp, "-s"))
    {
#ifdef HAVE_WINDOWS_H
	printf("The -s option is only available under unix.\n");
	smpd_print_options();
	smpd_exit(0);
#else
	smpd_process.bNoTTY = SMPD_TRUE;
#endif
    }

    if (smpd_get_opt(argcp, argvp, "-r"))
    {
#ifdef HAVE_WINDOWS_H
	printf("The -r option is only available under unix.\n");
	smpd_print_options();
#else
	printf("The -r root option is not yet implemented.\n");
#endif
	smpd_exit(0);
    }

    /* check for debug option */
    if (smpd_get_opt_int(argcp, argvp, "-d", &dbg_flag))
    {
	smpd_process.dbg_state = dbg_flag;
	smpd_process.bNoTTY = SMPD_FALSE;
	smpd_process.bService = SMPD_FALSE;
    }
    if (smpd_get_opt(argcp, argvp, "-d"))
    {
	smpd_process.dbg_state = SMPD_DBG_STATE_ERROUT | SMPD_DBG_STATE_STDOUT | SMPD_DBG_STATE_PREPEND_RANK | SMPD_DBG_STATE_TRACE;
	smpd_process.bNoTTY = SMPD_FALSE;
	smpd_process.bService = SMPD_FALSE;
    }
    if (smpd_get_opt_int(argcp, argvp, "-debug", &dbg_flag))
    {
	smpd_process.dbg_state = dbg_flag;
	smpd_process.bNoTTY = SMPD_FALSE;
	smpd_process.bService = SMPD_FALSE;
    }
    if (smpd_get_opt(argcp, argvp, "-debug"))
    {
	smpd_process.dbg_state = SMPD_DBG_STATE_ERROUT | SMPD_DBG_STATE_STDOUT | SMPD_DBG_STATE_PREPEND_RANK | SMPD_DBG_STATE_TRACE;
	smpd_process.bNoTTY = SMPD_FALSE;
	smpd_process.bService = SMPD_FALSE;
    }

    /* check for port option */
    smpd_get_opt_int(argcp, argvp, "-p", &smpd_process.port);
    smpd_get_opt_int(argcp, argvp, "-port", &smpd_process.port);
    if (smpd_get_opt(argcp, argvp, "-anyport"))
    {
	smpd_process.port = 0;
	smpd_process.dbg_state = 0; /* turn of debugging or you won't be able to read the port from stdout */
	smpd_process.bNoTTY = SMPD_FALSE;
	smpd_process.bService = SMPD_FALSE;
    }

    smpd_process.noprompt = smpd_get_opt(argcp, argvp, "-noprompt");

#ifdef HAVE_WINDOWS_H

    /* check for service options */
    if (smpd_get_opt(argcp, argvp, "-remove") ||
	smpd_get_opt(argcp, argvp, "-unregserver") ||
	smpd_get_opt(argcp, argvp, "-uninstall") ||
	smpd_get_opt(argcp, argvp, "/Remove") ||
	smpd_get_opt(argcp, argvp, "/Uninstall"))
    {
	/*RegDeleteKey(HKEY_CURRENT_USER, MPICHKEY);*/
	smpd_remove_service(SMPD_TRUE);
	ExitProcess(0);
    }
    if (smpd_get_opt(argcp, argvp, "-install") ||
	smpd_get_opt(argcp, argvp, "-regserver") ||
	smpd_get_opt(argcp, argvp, "/Install") ||
	smpd_get_opt(argcp, argvp, "/install") ||
	smpd_get_opt(argcp, argvp, "/RegServer"))
    {
	char phrase[SMPD_PASSPHRASE_MAX_LENGTH]="", port_str[SMPD_MAX_PORT_STR_LENGTH]="";

	if (smpd_remove_service(SMPD_FALSE) == SMPD_FALSE)
	{
	    printf("Unable to remove the previous installation, install failed.\n");
	    ExitProcess(0);
	}

	if (smpd_get_opt_string(argcp, argvp, "-phrase", phrase, SMPD_PASSPHRASE_MAX_LENGTH) ||
	    smpd_get_win_opt_string(argcp, argvp, "/phrase", phrase, SMPD_PASSPHRASE_MAX_LENGTH))
	{
	    smpd_set_smpd_data("phrase", phrase);
	}
	if (smpd_get_opt(argcp, argvp, "-getphrase"))
	{
	    printf("passphrase for smpd: ");fflush(stdout);
	    smpd_get_password(phrase);
	    smpd_set_smpd_data("phrase", phrase);
	}
	if (smpd_process.port != SMPD_LISTENER_PORT)
	{
        snprintf(port_str, SMPD_MAX_PORT_STR_LENGTH, "%d", smpd_process.port);
	    smpd_set_smpd_data("port", port_str);
	}

	smpd_install_service(SMPD_FALSE, SMPD_TRUE, smpd_get_opt(argcp, argvp, "-delegation"));
	smpd_set_smpd_data("version", SMPD_VERSION);
	ExitProcess(0);
    }
    if (smpd_get_opt(argcp, argvp, "-start"))
    {
	smpd_start_service();
	ExitProcess(0);
    }
    if (smpd_get_opt(argcp, argvp, "-stop"))
    {
	smpd_stop_service();
	ExitProcess(0);
    }
    if (smpd_get_opt(argcp, argvp, "-register_spn"))
    {
        char filename[SMPD_MAX_FILENAME];

        if(smpd_get_opt_string(argcp, argvp, "-f", filename, SMPD_MAX_FILENAME)){
            result = smpd_setup_scps_with_file(filename);
            if(result != SMPD_SUCCESS){
                printf("Failed to register smpd's Service Principal Names (at least one failed) with Domain Controller\n");
                ExitProcess((UINT )-1);
            }
        }
        else{
            result = smpd_setup_scp(NULL);
            if(result != SMPD_SUCCESS){
                printf("Failed to register smpd's Service Principal Name with Domain Controller\n");
                ExitProcess((UINT )-1);
            }
        }
        printf("Service Principal Name registered with the domain controller.\n");
        printf("SMPD is now capable of launching processes using passwordless delegation.\n");
        printf("The system administrator must ensure the following:\n");
        printf(" 1) This host is trusted for delegation in Active Directory\n");
        printf(" 2) All users who will run jobs are trusted for delegation.\n");
        printf("Domain administrators can enable these options for hosts and users\nin Active Directory on the domain controller.\n");
        ExitProcess(0);
    }
    if (smpd_get_opt(argcp, argvp, "-remove_spn"))
    {
        char filename[SMPD_MAX_FILENAME];
        smpd_spn_list_hnd_t hnd;

        result = smpd_spn_list_init(&hnd);
        if(result != SMPD_SUCCESS){
            printf("Unable to initialize SPN list\n");
            ExitProcess((UINT ) -1);
        }

        if(smpd_get_opt_string(argcp, argvp, "-f", filename, SMPD_MAX_FILENAME)){
            result = smpd_remove_scps_with_file(filename, hnd);
            if(result != SMPD_SUCCESS){
                printf("Failed to remove smpd's Service Principal Names (at least one failed) with Domain Controller\n");
                ExitProcess((UINT )-1);
            }
            printf("Removed smpd's Service Principal Names successfully\n");
        }
        else{
            result = smpd_remove_scp(NULL, hnd);
            if(result != SMPD_SUCCESS){
                printf("Failed to remove smpd's Service Principal Name with Domain Controller\n");
                ExitProcess((UINT )-1);
            }
            printf("Removed smpd's Service Principal Name successfully\n");
        }
        smpd_spn_list_finalize(&hnd);
        ExitProcess(0);
    }

    if (smpd_get_opt(argcp, argvp, "-mgr"))
    {
	/* Set a ctrl-handler to kill child processes if this smpd is killed */
	if (!SetConsoleCtrlHandler(smpd_ctrl_handler, TRUE))
	{
	    result = GetLastError();
	    smpd_dbg_printf("unable to set the ctrl handler for the smpd manager, error %d.\n", result);
	}
#ifdef HAVE_WINDOWS_H
    {
        BOOL ret;
        ret = smpd_init_affinity_table();
        if(!ret){
            smpd_dbg_printf("Initializing smpd affinity table failed\n");
        }
    }
#endif

	smpd_process.bService = SMPD_FALSE;
	if (!smpd_get_opt_string(argcp, argvp, "-read", read_handle_str, 20))
	{
	    smpd_err_printf("manager started without a read pipe handle.\n");
	    smpd_exit_fn(FCNAME);
	    return SMPD_FAIL;
	}
	if (!smpd_get_opt_string(argcp, argvp, "-write", write_handle_str, 20))
	{
	    smpd_err_printf("manager started without a write pipe handle.\n");
	    smpd_exit_fn(FCNAME);
	    return SMPD_FAIL;
	}
	hRead = smpd_decode_handle(read_handle_str);
	hWrite = smpd_decode_handle(write_handle_str);

	smpd_dbg_printf("manager creating listener and session sets.\n");

	result = SMPDU_Sock_create_set(&set);
	if (result != SMPD_SUCCESS)
	{
	    smpd_err_printf("SMPDU_Sock_create_set(listener) failed,\nsock error: %s\n", get_sock_error_string(result));
	    smpd_exit_fn(FCNAME);
	    return SMPD_FAIL;
	}
	smpd_process.set = set;
	smpd_dbg_printf("created set for manager listener, %d\n", SMPDU_Sock_get_sock_set_id(set));
	port = 0;
	result = SMPDU_Sock_listen(set, NULL, &port, &listener); 
	if (result != SMPD_SUCCESS)
	{
	    smpd_err_printf("SMPDU_Sock_listen failed,\nsock error: %s\n", get_sock_error_string(result));
	    smpd_exit_fn(FCNAME);
	    return SMPD_FAIL;
	}
	smpd_dbg_printf("smpd manager listening on port %d\n", port);

	result = smpd_create_context(SMPD_CONTEXT_LISTENER, set, listener, -1, &smpd_process.listener_context);
	if (result != SMPD_SUCCESS)
	{
	    smpd_err_printf("unable to create a context for the smpd listener.\n");
	    smpd_exit_fn(FCNAME);
	    return result;
	}
	result = SMPDU_Sock_set_user_ptr(listener, smpd_process.listener_context);
	if (result != SMPD_SUCCESS)
	{
	    smpd_err_printf("SMPDU_Sock_set_user_ptr failed,\nsock error: %s\n", get_sock_error_string(result));
	    smpd_exit_fn(FCNAME);
	    return result;
	}
	smpd_process.listener_context->state = SMPD_MGR_LISTENING;

	memset(str, 0, 20);
	snprintf(str, 20, "%d", port);
	smpd_dbg_printf("manager writing port back to smpd.\n");
	if (!WriteFile(hWrite, str, 20, &num_written, NULL))
	{
	    smpd_err_printf("WriteFile failed, error %d\n", GetLastError());
	    smpd_exit_fn(FCNAME);
	    return SMPD_FAIL;
	}
	CloseHandle(hWrite);
	if (num_written != 20)
	{
	    smpd_err_printf("wrote only %d bytes of 20\n", num_written);
	    smpd_exit_fn(FCNAME);
	    return SMPD_FAIL;
	}
	smpd_dbg_printf("manager reading account and password from smpd.\n");
	if (!ReadFile(hRead, smpd_process.UserAccount, SMPD_MAX_ACCOUNT_LENGTH, &num_read, NULL))
	{
	    smpd_err_printf("ReadFile failed, error %d\n", GetLastError());
	    smpd_exit_fn(FCNAME);
	    return SMPD_FAIL;
	}
	if (num_read != SMPD_MAX_ACCOUNT_LENGTH)
	{
	    smpd_err_printf("read only %d bytes of %d\n", num_read, SMPD_MAX_ACCOUNT_LENGTH);
	    smpd_exit_fn(FCNAME);
	    return SMPD_FAIL;
	}
	if (!ReadFile(hRead, smpd_process.UserPassword, SMPD_MAX_PASSWORD_LENGTH, &num_read, NULL))
	{
	    smpd_err_printf("ReadFile failed, error %d\n", GetLastError());
	    smpd_exit_fn(FCNAME);
	    return SMPD_FAIL;
	}
	if (num_read != SMPD_MAX_PASSWORD_LENGTH)
	{
	    smpd_err_printf("read only %d bytes of %d\n", num_read, SMPD_MAX_PASSWORD_LENGTH);
	    smpd_exit_fn(FCNAME);
	    return SMPD_FAIL;
	}
	if (!ReadFile(hRead, smpd_process.passphrase, SMPD_PASSPHRASE_MAX_LENGTH, &num_read, NULL))
	{
	    smpd_err_printf("ReadFile failed, error %d\n", GetLastError());
	    smpd_exit_fn(FCNAME);
	    return SMPD_FAIL;
	}
	if (num_read != SMPD_PASSPHRASE_MAX_LENGTH)
	{
	    smpd_err_printf("read only %d bytes of %d\n", num_read, SMPD_PASSPHRASE_MAX_LENGTH);
	    smpd_exit_fn(FCNAME);
	    return SMPD_FAIL;
	}
	smpd_process.credentials_prompt = SMPD_FALSE;

	result = smpd_enter_at_state(set, SMPD_MGR_LISTENING);
	if (result != SMPD_SUCCESS)
	{
	    smpd_err_printf("state machine failed.\n");
	}

	/*
	result = SMPDU_Sock_finalize();
	if (result != SMPD_SUCCESS)
	{
	    smpd_err_printf("SMPDU_Sock_finalize failed,\nsock error: %s\n", get_sock_error_string(result));
	}
	*/
	smpd_exit(0);
	smpd_exit_fn(FCNAME);
	ExitProcess(0);
    }
#endif

    /* check for the status option */
    if (smpd_get_opt_string(argcp, argvp, "-status", smpd_process.console_host, SMPD_MAX_HOST_LENGTH))
    {
	smpd_process.do_console = 1;
	smpd_process.builtin_cmd = SMPD_CMD_DO_STATUS;
    }
    else if (smpd_get_opt(argcp, argvp, "-status"))
    {
	smpd_get_hostname(smpd_process.console_host, SMPD_MAX_HOST_LENGTH);
	smpd_process.do_console = 1;
	smpd_process.builtin_cmd = SMPD_CMD_DO_STATUS;
    }

    /* check for console options */
    if (smpd_get_opt_string(argcp, argvp, "-console", smpd_process.console_host, SMPD_MAX_HOST_LENGTH))
    {
	smpd_process.do_console = 1;
    }
    else if (smpd_get_opt(argcp, argvp, "-console"))
    {
	smpd_get_hostname(smpd_process.console_host, SMPD_MAX_HOST_LENGTH);
	smpd_process.do_console = 1;
    }
    if (smpd_process.do_console)
    {
	/* This may need to be changed to avoid conflict */
	if (smpd_get_opt(argcp, argvp, "-p"))
	{
	    smpd_process.use_process_session = 1;
	}
    }

    if (smpd_get_opt_string(argcp, argvp, "-shutdown", smpd_process.console_host, SMPD_MAX_HOST_LENGTH))
    {
	smpd_process.do_console = 1;
	smpd_process.builtin_cmd = SMPD_CMD_SHUTDOWN;
    }
    else if (smpd_get_opt(argcp, argvp, "-shutdown"))
    {
	smpd_get_hostname(smpd_process.console_host, SMPD_MAX_HOST_LENGTH);
	smpd_process.do_console = 1;
	smpd_process.builtin_cmd = SMPD_CMD_SHUTDOWN;
    }

    if (smpd_get_opt_string(argcp, argvp, "-restart", smpd_process.console_host, SMPD_MAX_HOST_LENGTH))
    {
	smpd_process.do_console = 1;
	smpd_process.builtin_cmd = SMPD_CMD_RESTART;
    }
    else if (smpd_get_opt(argcp, argvp, "-restart"))
    {
#ifdef HAVE_WINDOWS_H
	printf("restarting the smpd service...\n");
	smpd_stop_service();
	Sleep(1000);
	smpd_start_service();
	smpd_exit(0);
#else
	smpd_get_hostname(smpd_process.console_host, SMPD_MAX_HOST_LENGTH);
	smpd_process.do_console = 1;
	smpd_process.builtin_cmd = SMPD_CMD_RESTART;
#endif
    }

    if (smpd_get_opt_string(argcp, argvp, "-version", smpd_process.console_host, SMPD_MAX_HOST_LENGTH))
    {
	smpd_process.do_console = 1;
	smpd_process.builtin_cmd = SMPD_CMD_VERSION;
    }
    else if (smpd_get_opt(argcp, argvp, "-version"))
    {
	printf("%s\n", SMPD_VERSION);
	fflush(stdout);
	smpd_exit(0);
    }

    /* These commands are handled by mpiexec although doing them here is an alternate solution.
    if (smpd_get_opt_two_strings(argcp, argvp, "-add_job", smpd_process.job_key, SMPD_MAX_NAME_LENGTH, smpd_process.job_key_account, SMPD_MAX_ACCOUNT_LENGTH))
    {
	if (!smpd_get_opt_string(argcp, argvp, "-host", smpd_process.console_host, SMPD_MAX_HOST_LENGTH))
	    smpd_get_hostname(smpd_process.console_host, SMPD_MAX_HOST_LENGTH);
	if (smpd_get_opt_string(argcp, argvp, "-password", smpd_process.job_key_password, SMPD_MAX_PASSWORD_LENGTH)
	    smpd_process.builtin_cmd = SMPD_CMD_ADD_JOB_KEY_AND_PASSWORD;
	else
	    smpd_process.builtin_cmd = SMPD_CMD_ADD_JOB_KEY;
	smpd_process.do_console = 1;
    }

    if (smpd_get_opt_string(argcp, argvp, "-remove_job", smpd_process.job_key, SMPD_MAX_NAME_LENGTH))
    {
	if (!smpd_get_opt_string(argcp, argvp, "-host", smpd_process.console_host, SMPD_MAX_HOST_LENGTH))
	    smpd_get_hostname(smpd_process.console_host, SMPD_MAX_HOST_LENGTH);
	smpd_process.do_console = 1;
	smpd_process.builtin_cmd = SMPD_CMD_REMOVE_JOB_KEY;
    }

    if (smpd_get_opt_string(argcp, argvp, "-associate_job", smpd_process.job_key, SMPD_MAX_NAME_LENGTH))
    {
	if (!smpd_get_opt_string(argcp, argvp, "-host", smpd_process.console_host, SMPD_MAX_HOST_LENGTH))
	    smpd_get_hostname(smpd_process.console_host, SMPD_MAX_HOST_LENGTH);
	smpd_process.do_console = 1;
	smpd_process.builtin_cmd = SMPD_CMD_ASSOCIATE_JOB_KEY;
    }
    */

    smpd_get_opt_string(argcp, argvp, "-phrase", smpd_process.passphrase, SMPD_PASSPHRASE_MAX_LENGTH);
    if (smpd_get_opt(argcp, argvp, "-getphrase"))
    {
	printf("passphrase for smpd: ");fflush(stdout);
	smpd_get_password(smpd_process.passphrase);
    }

    if (smpd_get_opt_string(argcp, argvp, "-smpdfile", smpd_process.smpd_filename, SMPD_MAX_FILENAME))
    {
	struct stat s;

	if (stat(smpd_process.smpd_filename, &s) == 0)
	{
	    if (s.st_mode & 00077)
	    {
		printf(".smpd file cannot be readable by anyone other than the current user.\n");
		smpd_exit_fn(FCNAME);
		return SMPD_FAIL;
	    }
	}
    }

    if (smpd_get_opt_string(argcp, argvp, "-traceon", filename, SMPD_MAX_FILENAME))
    {
	smpd_process.do_console_returns = SMPD_TRUE;
	if (*argcp > 1)
	{
	    for (i=1; i<*argcp; i++)
	    {
		strcpy(smpd_process.console_host, (*argvp)[i]);

		smpd_process.do_console = 1;
		smpd_process.builtin_cmd = SMPD_CMD_SET;
		strcpy(smpd_process.key, "logfile");
		strcpy(smpd_process.val, filename);
		result = smpd_do_console();
		if (result != SMPD_SUCCESS)
		{
		    smpd_err_printf("Unable to set the logfile name on host '%s'\n", smpd_process.console_host);
		    smpd_exit_fn(FCNAME);
		    return result;
		}

		smpd_process.do_console = 1;
		smpd_process.builtin_cmd = SMPD_CMD_SET;
		strcpy(smpd_process.key, "log");
		strcpy(smpd_process.val, "yes");
		result = smpd_do_console();
		if (result != SMPD_SUCCESS)
		{
		    smpd_err_printf("Unable to set the log option on host '%s'\n", smpd_process.console_host);
		    smpd_exit_fn(FCNAME);
		    return result;
		}

		smpd_process.do_console = 1;
		smpd_process.builtin_cmd = SMPD_CMD_RESTART;
		result = smpd_do_console();
		if (result != SMPD_SUCCESS)
		{
		    smpd_err_printf("Unable to restart the smpd on host '%s'\n", smpd_process.console_host);
		    smpd_exit_fn(FCNAME);
		    return result;
		}
	    }
	}
	else
	{
	    result = smpd_set_smpd_data("logfile", filename);
	    result = smpd_set_smpd_data("log", "yes");

#ifdef HAVE_WINDOWS_H
	    printf("restarting the smpd service...\n");
	    smpd_stop_service();
	    Sleep(1000);
	    smpd_start_service();
#else
	    smpd_get_hostname(smpd_process.console_host, SMPD_MAX_HOST_LENGTH);
	    smpd_process.do_console = 1;
	    smpd_process.builtin_cmd = SMPD_CMD_RESTART;
	    result = smpd_do_console();
	    if (result != SMPD_SUCCESS)
	    {
		smpd_err_printf("Unable to restart the smpd on host '%s'\n", smpd_process.console_host);
		smpd_exit_fn(FCNAME);
		return result;
	    }
#endif
	}
	smpd_exit_fn(FCNAME);
	smpd_exit(result);
    }

    if (smpd_get_opt(argcp, argvp, "-traceoff"))
    {
	smpd_process.do_console_returns = SMPD_TRUE;
	if (*argcp > 1)
	{
	    for (i=1; i<*argcp; i++)
	    {
		strcpy(smpd_process.console_host, (*argvp)[i]);

		smpd_process.do_console = 1;
		smpd_process.builtin_cmd = SMPD_CMD_SET;
		strcpy(smpd_process.key, "log");
		strcpy(smpd_process.val, "no");
		result = smpd_do_console();
		if (result != SMPD_SUCCESS)
		{
		    smpd_err_printf("Unable to set the log option on host '%s'\n", smpd_process.console_host);
		    smpd_exit_fn(FCNAME);
		    return result;
		}

		smpd_process.do_console = 1;
		smpd_process.builtin_cmd = SMPD_CMD_RESTART;
		result = smpd_do_console();
		if (result != SMPD_SUCCESS)
		{
		    smpd_err_printf("Unable to restart the smpd on host '%s'\n", smpd_process.console_host);
		    smpd_exit_fn(FCNAME);
		    return result;
		}
	    }
	}
	else
	{
	    result = smpd_set_smpd_data("log", "no");

#ifdef HAVE_WINDOWS_H
	    printf("restarting the smpd service...\n");
	    smpd_stop_service();
	    Sleep(1000);
	    smpd_start_service();
#else
	    smpd_get_hostname(smpd_process.console_host, SMPD_MAX_HOST_LENGTH);
	    smpd_process.do_console = 1;
	    smpd_process.builtin_cmd = SMPD_CMD_RESTART;
	    result = smpd_do_console();
	    if (result != SMPD_SUCCESS)
	    {
		smpd_err_printf("Unable to restart the smpd on host '%s'\n", smpd_process.console_host);
		smpd_exit_fn(FCNAME);
		return result;
	    }
#endif
	}
	smpd_exit_fn(FCNAME);
	smpd_exit(result);
    }

    if (smpd_process.do_console)
    {
	result = smpd_do_console();
	smpd_exit_fn(FCNAME);
	return result;
    }

    smpd_exit_fn(FCNAME);
    return SMPD_SUCCESS;
}
Пример #23
0
int main(int argc, char* argv[])
{
    int result = SMPD_SUCCESS;
    smpd_host_node_t *host_node_ptr;
    smpd_launch_node_t *launch_node_ptr;
    smpd_context_t *context;
    SMPDU_Sock_set_t set;
    SMPDU_Sock_t sock = SMPDU_SOCK_INVALID_SOCK;
    smpd_state_t state;

    smpd_enter_fn("main");

    /* catch an empty command line */
    if (argc < 2)
    {
	mp_print_options();
	exit(0);
    }

    smpd_process.mpiexec_argv0 = argv[0];

    /* initialize */
    /* FIXME: Get rid of this hack - we already create 
     * local KVS for all singleton clients by default
     */
    putenv("PMI_SMPD_FD=0");
    result = PMPI_Init(&argc, &argv);
    /* SMPD_CS_ENTER(); */
    if (result != SMPD_SUCCESS)
    {
	smpd_err_printf("SMPD_Init failed,\nerror: %d\n", result);
	smpd_exit_fn("main");
	return result;
    }

    result = SMPDU_Sock_init();
    if (result != SMPD_SUCCESS)
    {
	smpd_err_printf("SMPDU_Sock_init failed,\nsock error: %s\n",
		      get_sock_error_string(result));
	smpd_exit_fn("main");
	return result;
    }

    result = smpd_init_process();
    if (result != SMPD_SUCCESS)
    {
	smpd_err_printf("smpd_init_process failed.\n");
	goto quit_job;
    }

    smpd_process.dbg_state = SMPD_DBG_STATE_ERROUT;

    /* parse the command line */
    smpd_dbg_printf("parsing the command line.\n");
    result = mp_parse_command_args(&argc, &argv);
    if (result != SMPD_SUCCESS)
    {
	smpd_err_printf("Unable to parse the mpiexec command arguments.\n");
	goto quit_job;
    }

    /* If we are using MS HPC job scheduler we only connect
     * to the local SMPD
     */
    if(smpd_process.use_ms_hpc){
        char host[100];
        int id;
        /* Free the current host list */
        result = smpd_free_host_list();
        if(result != SMPD_SUCCESS){
            smpd_err_printf("Unable to free the global host list\n");
            goto quit_job;
        }
        /* Add local host to the host list */
        result = smpd_get_hostname(host, 100);
        if(result != SMPD_SUCCESS){
            smpd_err_printf("Unable to get the local hostname\n");
            goto quit_job;
        }
	    result = smpd_get_host_id(host, &id);
        if(result != SMPD_SUCCESS){
            smpd_err_printf("Unable to get host id for local host\n");
            goto quit_job;
        }
        /* Set the number of PMI procs since they are not launched by mpiexec */
        smpd_process.nproc = smpd_process.launch_list->nproc;
        smpd_dbg_printf("Adding (%s:%d) == (localhost) to the host list\n", host, id);
    }

    /* print and see what we've got */
    /* debugging output *************/
    smpd_dbg_printf("host tree:\n");
    host_node_ptr = smpd_process.host_list;
    if (!host_node_ptr)
	smpd_dbg_printf("<none>\n");
    while (host_node_ptr)
    {
	smpd_dbg_printf(" host: %s, parent: %d, id: %d\n",
	    host_node_ptr->host,
	    host_node_ptr->parent, host_node_ptr->id);
	host_node_ptr = host_node_ptr->next;
    }
    smpd_dbg_printf("launch nodes:\n");
    launch_node_ptr = smpd_process.launch_list;
    if (!launch_node_ptr)
	smpd_dbg_printf("<none>\n");
    while (launch_node_ptr)
    {
	smpd_dbg_printf(" iproc: %d, id: %d, exe: %s\n",
	    launch_node_ptr->iproc, launch_node_ptr->host_id,
	    launch_node_ptr->exe);
	launch_node_ptr = launch_node_ptr->next;
    }
    /* end debug output *************/

    /* set the id of the mpiexec node to zero */
    smpd_process.id = 0;

    result = SMPDU_Sock_create_set(&set);
    if (result != SMPD_SUCCESS)
    {
	smpd_err_printf("SMPDU_Sock_create_set failed,\nsock error: %s\n", get_sock_error_string(result));
	goto quit_job;
    }
    smpd_process.set = set;

    /* Check to see if the user wants to use a remote shell mechanism for launching the processes
     * instead of using the smpd process managers.
     */
    if (smpd_process.rsh_mpiexec == SMPD_TRUE)
    {
	/* Do rsh or localonly stuff */
	result = mpiexec_rsh();

	/* skip over the non-rsh code and go to the cleanup section */
	goto quit_job;
    }

    /* Start the timeout mechanism if specified */
    /* This code occurs after the rsh_mpiexec option check because the rsh code implementes timeouts differently */
    if (smpd_process.timeout > 0)
    {
#ifdef HAVE_WINDOWS_H
	/* create a Windows thread to sleep until the timeout expires */
	if (smpd_process.timeout_thread == NULL)
	{
	    smpd_process.timeout_thread = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)timeout_thread, NULL, 0, NULL);
	    if (smpd_process.timeout_thread == NULL)
	    {
		printf("Error: unable to create a timeout thread, errno %d.\n", GetLastError());
		smpd_exit_fn("mp_parse_command_args");
		return SMPD_FAIL;
	    }
	}
#elif defined(SIGALRM)
	/* create an alarm to signal mpiexec when the timeout expires */
	smpd_signal(SIGALRM, timeout_function);
	alarm(smpd_process.timeout);
#elif defined(HAVE_PTHREAD_H)
	/* create a pthread to sleep until the timeout expires */
	result = pthread_create(&smpd_process.timeout_thread, NULL, timeout_thread, NULL);
	if (result != 0)
	{
	    printf("Error: unable to create a timeout thread, errno %d.\n", result);
	    smpd_exit_fn("mp_parse_command_args");
	    return SMPD_FAIL;
	}
#else
	/* no timeout mechanism available */
#endif
    }

    /* make sure we have a passphrase to authenticate connections to the smpds */
    if (smpd_process.passphrase[0] == '\0')
	smpd_get_smpd_data("phrase", smpd_process.passphrase, SMPD_PASSPHRASE_MAX_LENGTH);
    if (smpd_process.passphrase[0] == '\0')
    {
	if (smpd_process.noprompt)
	{
	    printf("Error: No smpd passphrase specified through the registry or .smpd file, exiting.\n");
	    result = SMPD_FAIL;
	    goto quit_job;
	}
	printf("Please specify an authentication passphrase for smpd: ");
	fflush(stdout);
	smpd_get_password(smpd_process.passphrase);
    }

    /* set the state to create a console session or a job session */
    state = smpd_process.do_console ? SMPD_MPIEXEC_CONNECTING_SMPD : SMPD_MPIEXEC_CONNECTING_TREE;

    result = smpd_create_context(SMPD_CONTEXT_LEFT_CHILD, set, sock, 1, &context);
    if (result != SMPD_SUCCESS)
    {
	smpd_err_printf("unable to create a context for the first host in the tree.\n");
	goto quit_job;
    }
#ifdef HAVE_WINDOWS_H
    if (!smpd_process.local_root)
    {
#endif
	/* start connecting the tree by posting a connect to the first host */
	result = SMPDU_Sock_post_connect(set, context, smpd_process.host_list->host, smpd_process.port, &sock);
	if (result != SMPD_SUCCESS)
	{
	    smpd_err_printf("Unable to connect to '%s:%d',\nsock error: %s\n",
		smpd_process.host_list->host, smpd_process.port, get_sock_error_string(result));
	    goto quit_job;
	}
#ifdef HAVE_WINDOWS_H
    }
#endif
    context->sock = sock;
    context->state = state;
    context->connect_to = smpd_process.host_list;
#ifdef HAVE_WINDOWS_H
    if (smpd_process.local_root)
    {
	int port;
	smpd_context_t *rc_context;

	/* The local_root option is implemented by having mpiexec act as the smpd
	 * and launch the smpd manager.  Then mpiexec connects to this manager just
	 * as if it had been created by a real smpd.  This causes all the processes
	 * destined for the first smpd host to be launched by this child process of
	 * mpiexec and not the smpd service.  This allows for these processes to
	 * create windows that are visible to the interactive user.  It also means 
	 * that the job cannot be run in the context of a user other than the user
	 * running mpiexec. */

	/* get the path to smpd.exe because pszExe is currently mpiexec.exe */
	smpd_get_smpd_data("binary", smpd_process.pszExe, SMPD_MAX_EXE_LENGTH);

	/* launch the manager process */
	result = smpd_start_win_mgr(context, SMPD_FALSE);
	if (result != SMPD_SUCCESS)
	{
	    smpd_err_printf("unable to start the local smpd manager.\n");
	    goto quit_job;
	}

	/* connect to the manager */
	smpd_dbg_printf("connecting a new socket.\n");
	port = atol(context->port_str);
	if (port < 1)
	{
	    smpd_err_printf("Invalid reconnect port read: %d\n", port);
	    goto quit_job;
	}
	result = smpd_create_context(context->type, context->set, SMPDU_SOCK_INVALID_SOCK, context->id, &rc_context);
	if (result != SMPD_SUCCESS)
	{
	    smpd_err_printf("unable to create a new context for the reconnection.\n");
	    goto quit_job;
	}
	rc_context->state = context->state;
	rc_context->write_state = SMPD_RECONNECTING;
	context->state = SMPD_CLOSING;
	rc_context->connect_to = context->connect_to;
	rc_context->connect_return_id = context->connect_return_id;
	rc_context->connect_return_tag = context->connect_return_tag;
	strcpy(rc_context->host, context->host);
	smpd_process.left_context = rc_context;
	smpd_dbg_printf("posting a re-connect to %s:%d in %s context.\n", rc_context->connect_to->host, port, smpd_get_context_str(rc_context));
	result = SMPDU_Sock_post_connect(rc_context->set, rc_context, rc_context->connect_to->host, port, &rc_context->sock);
	if (result != SMPD_SUCCESS)
	{
	    smpd_err_printf("Unable to post a connect to '%s:%d',\nsock error: %s\n",
		rc_context->connect_to->host, port, get_sock_error_string(result));
	    if (smpd_post_abort_command("Unable to connect to '%s:%d',\nsock error: %s\n",
		rc_context->connect_to->host, port, get_sock_error_string(result)) != SMPD_SUCCESS)
	    {
		goto quit_job;
	    }
	}
    }
    else
    {
#endif
	smpd_process.left_context = context;
	result = SMPDU_Sock_set_user_ptr(sock, context);
	if (result != SMPD_SUCCESS)
	{
	    smpd_err_printf("unable to set the smpd sock user pointer,\nsock error: %s\n",
		get_sock_error_string(result));
	    goto quit_job;
	}
#ifdef HAVE_WINDOWS_H
    }
#endif

#ifdef HAVE_WINDOWS_H
    {
	/* Create a break handler and a socket to handle aborting the job when mpiexec receives break signals */
	smpd_context_t *reader_context;
	SMPDU_Sock_t sock_reader;
	SMPDU_SOCK_NATIVE_FD reader, writer;

	smpd_make_socket_loop((SOCKET*)&reader, (SOCKET*)&writer);
	result = SMPDU_Sock_native_to_sock(set, reader, NULL, &sock_reader);
	result = SMPDU_Sock_native_to_sock(set, writer, NULL, &smpd_process.mpiexec_abort_sock);
	result = smpd_create_context(SMPD_CONTEXT_MPIEXEC_ABORT, set, sock_reader, -1, &reader_context);
	reader_context->read_state = SMPD_READING_MPIEXEC_ABORT;
	result = SMPDU_Sock_post_read(sock_reader, &reader_context->read_cmd.cmd, 1, 1, NULL);

	if (!SetConsoleCtrlHandler(mpiexec_ctrl_handler, TRUE))
	{
	    /* Don't error out; allow the job to run without a ctrl handler? */
	    result = GetLastError();
	    smpd_dbg_printf("unable to set a ctrl handler for mpiexec, error %d\n", result);
	}
    }
#endif

    result = smpd_enter_at_state(set, state);
    if (result != SMPD_SUCCESS)
    {
	smpd_err_printf("state machine failed.\n");
	goto quit_job;
    }

quit_job:

    if ((result != SMPD_SUCCESS) && (smpd_process.mpiexec_exit_code == 0))
    {
	smpd_process.mpiexec_exit_code = -1;
    }

    /* finalize */

    smpd_dbg_printf("calling SMPDU_Sock_finalize\n");
    result = SMPDU_Sock_finalize();
    if (result != SMPD_SUCCESS)
    {
	smpd_err_printf("SMPDU_Sock_finalize failed,\nsock error: %s\n", get_sock_error_string(result));
    }

    /* SMPD_Finalize called in smpd_exit()
    smpd_dbg_printf("calling SMPD_Finalize\n");
    result = PMPI_Finalize();
    if (result != SMPD_SUCCESS)
    {
	smpd_err_printf("SMPD_Finalize failed,\nerror: %d\n", result);
    }
    */

#ifdef HAVE_WINDOWS_H
    if (smpd_process.hCloseStdinThreadEvent)
	SetEvent(smpd_process.hCloseStdinThreadEvent);
    if (smpd_process.hStdinThread != NULL)
    {
	/* close stdin so the input thread will exit */
	CloseHandle(GetStdHandle(STD_INPUT_HANDLE));
	if (WaitForSingleObject(smpd_process.hStdinThread, 3000) != WAIT_OBJECT_0)
	{
	    TerminateThread(smpd_process.hStdinThread, 321);
	}
	CloseHandle(smpd_process.hStdinThread);
    }
    if (smpd_process.hCloseStdinThreadEvent)
    {
	CloseHandle(smpd_process.hCloseStdinThreadEvent);
	smpd_process.hCloseStdinThreadEvent = NULL;
    }
#elif defined(USE_PTHREAD_STDIN_REDIRECTION)
    smpd_cancel_stdin_thread();
#endif
    smpd_exit_fn("main");
    /* SMPD_CS_EXIT(); */
    return smpd_exit(smpd_process.mpiexec_exit_code);
}