Ejemplo n.º 1
0
HYD_status HYDU_sock_is_local(char *host, int *is_local)
{
    struct hostent *ht;
    char *host_ip = NULL, *lhost_ip = NULL;
    char lhost[MAX_HOSTNAME_LEN];
    struct sockaddr_in sa;
    struct ifaddrs *ifaddr, *ifa;
    char buf[MAX_HOSTNAME_LEN];
    HYD_status status = HYD_SUCCESS;

    *is_local = 0;

    /* Algorithm used:
     *
     * 1. Find the local host name
     *    - If "host" matches the local host name, return.
     * 2. Find the IP address associated with "host" and the IP the local host
     *    resolves to.
     *    - If these IPs match, return.
     * 3. Find all local network IP addresses
     *    - If the "host" IP address matches any of the local network IP
     *      addresses, return.
     */


    /* STEP 1: If "host" matches the local host name, return */
    if (gethostname(lhost, MAX_HOSTNAME_LEN) < 0) {
        /* We can't figure out what my localhost name is.  *sigh*.  We
         * could return an error here, but we will just punt it to the
         * upper layer saying that we don't know if it is local.  We
         * cannot try steps 2 and 3 either, since we don't have our
         * local hostname. */
        goto fn_exit;
    }
    else if (!strcmp(lhost, host)) {
        *is_local = 1;
        goto fn_exit;
    }
    else {
        /* we have our local hostname, but that does not match the
         * provided hostname.  Let's try to get our remote IP address
         * first.  If we can't get that, we can give up. */
        /* If we are unable to resolve the remote host name, it need
         * not be an error. It could mean that the user is using an
         * alias for the hostname (e.g., an ssh config alias) */
        if ((ht = gethostbyname(host)) == NULL)
            goto fn_exit;

        memset((char *) &sa, 0, sizeof(struct sockaddr_in));
        memcpy(&sa.sin_addr, ht->h_addr_list[0], ht->h_length);

        /* Find the IP address of the host */
        host_ip = MPL_strdup((char *) inet_ntop(AF_INET, (const void *) &sa.sin_addr, buf,
                                                MAX_HOSTNAME_LEN));
        HYDU_ASSERT(host_ip, status);
    }

    /* OK, if we are here, we got the remote IP.  We have two ways of
     * getting the local IP: gethostbyname or getifaddrs.  We'll try
     * both.  */

    /* STEP 2: Let's try the gethostbyname model */

    if ((ht = gethostbyname(lhost))) {
        memset((char *) &sa, 0, sizeof(struct sockaddr_in));
        memcpy(&sa.sin_addr, ht->h_addr_list[0], ht->h_length);

        /* Find the IP address of the host */
        lhost_ip = MPL_strdup((char *) inet_ntop(AF_INET, (const void *) &sa.sin_addr, buf,
                                                 MAX_HOSTNAME_LEN));
        HYDU_ASSERT(lhost_ip, status);

        /* See if the IP address of the hostname we got matches the IP
         * address to which the local host resolves */
        if (!strcmp(lhost_ip, host_ip)) {
            *is_local = 1;
            goto fn_exit;
        }
    }

    /* Either gethostbyname didn't resolve or we didn't find a match.
     * Either way, let's try the getifaddr model. */

    /* STEP 3: Let's try the getifaddr model */

    if (getifaddrs(&ifaddr) == -1)
        HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "getifaddrs failed\n");

    /* Find the IP addresses of all local interfaces */
    for (ifa = ifaddr; ifa; ifa = ifa->ifa_next) {
        if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) {
            struct sockaddr_in *sa_ptr = (struct sockaddr_in *) ifa->ifa_addr;

            lhost_ip = MPL_strdup((char *)
                                  inet_ntop(AF_INET, (const void *) &(sa_ptr->sin_addr), buf,
                                            MAX_HOSTNAME_LEN));
            HYDU_ASSERT(lhost_ip, status);

            /* For each local IP address, see if it matches the "host"
             * IP address */
            if (!strcmp(host_ip, lhost_ip)) {
                *is_local = 1;
                freeifaddrs(ifaddr);
                goto fn_exit;
            }

            MPL_free(lhost_ip);
            lhost_ip = NULL;
        }
    }

    freeifaddrs(ifaddr);

  fn_exit:
    if (host_ip)
        MPL_free(host_ip);
    if (lhost_ip)
        MPL_free(lhost_ip);
    return status;

  fn_fail:
    goto fn_exit;
}
Ejemplo n.º 2
0
static int fPMI_Handle_spawn(PMIProcess * pentry)
{
    char inbuf[PMIU_MAXLINE];
    char *(args[PMI_MAX_ARGS]);
    char key[MAXKEYLEN];
    char outbuf[PMIU_MAXLINE];
    ProcessWorld *pWorld;
    ProcessApp *app = 0;
    int preputNum = 0, rc;
    int i;
    int totspawns = 0, spawnnum = 0;
    PMIKVSpace *kvs = 0;
    /* Variables for info */
    char curInfoKey[PMI_MAX_INFO_KEY], curInfoVal[PMI_MAX_INFO_VAL];
    int curInfoIdx = -1;

    DBG_PRINTFCOND(pmidebug, ("Entering fPMI_Handle_spawn\n"));

    if (!pentry->spawnWorld) {
        pWorld = (ProcessWorld *) MPL_malloc(sizeof(ProcessWorld), MPL_MEM_PM);
        if (!pWorld)
            return 1;

        pentry->spawnWorld = pWorld;
        pWorld->apps = 0;
        pWorld->nProcess = 0;
        pWorld->nextWorld = 0;
        pWorld->nApps = 0;
        pWorld->worldNum = pUniv.nWorlds++;
        /* FIXME: What should be the defaults for the spawned env?
         * Should the default be the env ov the spawner? */
        pWorld->genv = 0;
        pentry->spawnKVS = fPMIKVSAllocate();
    } else {
        pWorld = pentry->spawnWorld;
    }
    kvs = pentry->spawnKVS;

    /* Note that each mcmd=spawn creates an app.  When all apps
     * are present, then then can be linked to a world.  A
     * spawnmultiple command makes use of multiple mcmd=spawn PMI
     * commands */

    /* Create a new app */
    app = (ProcessApp *) MPL_malloc(sizeof(ProcessApp), MPL_MEM_PM);
    if (!app)
        return 1;
    app->myAppNum = 0;
    app->exename = 0;
    app->arch = 0;
    app->path = 0;
    app->wdir = 0;
    app->hostname = 0;
    app->args = 0;
    app->nArgs = 0;
    app->soft.nelm = 0;
    app->nProcess = 0;
    app->pState = 0;
    app->nextApp = 0;
    app->env = 0;
    app->pWorld = pWorld;

    /* Add to the pentry spawn structure */
    if (pentry->spawnAppTail) {
        pentry->spawnAppTail->nextApp = app;
    } else {
        pentry->spawnApp = app;
        pWorld->apps = app;
    }
    pentry->spawnAppTail = app;

    for (i = 0; i < PMI_MAX_ARGS; i++)
        args[i] = 0;

    /* Get lines until we find either cmd or mcmd (an error) or endcmd
     * (expected end) */
    while ((rc = PMIUBufferedReadLine(pentry, inbuf, sizeof(inbuf))) > 0) {
        char *cmdPtr, *valPtr, *p;

        /* Find the command = format */
        p = inbuf;
        /* Find first nonblank */
        while (*p && isascii(*p) && isspace(*p))
            p++;
        if (!*p) {
            /* Empty string.  Ignore */
            continue;
        }
        cmdPtr = p++;
        /* Find '=' */
        while (*p && *p != '=')
            p++;
        if (!*p) {
            /* No =.  Check for endcmd */
            p--;
            /* Trim spaces */
            while (isascii(*p) && isspace(*p))
                p--;
            /* Add null to end */
            *++p = 0;
            if (strcmp("endcmd", cmdPtr) == 0) {
                break;
            }
            /* FIXME: Otherwise, we have a problem */
            MPL_error_printf("Malformed PMI command (no endcmd seen\n");
            return 1;
        } else {
            *p = 0;
        }

        /* Found an = .  value is the rest of the line */
        valPtr = ++p;
        while (*p && *p != '\n')
            p++;
        if (*p)
            *p = 0;     /* Remove the newline */

        /* Now, process the cmd and value */
        if (strcmp("nprocs", cmdPtr) == 0) {
            app->nProcess = atoi(valPtr);
            pWorld->nProcess += app->nProcess;
        } else if (strcmp("execname", cmdPtr) == 0) {
            app->exename = MPL_strdup(valPtr);
        } else if (strcmp("totspawns", cmdPtr) == 0) {
            /* This tells us how many separate spawn commands
             * we expect to see (e.g., for spawn multiple).
             * Each spawn command is a separate "app" */
            totspawns = atoi(valPtr);
        } else if (strcmp("spawnssofar", cmdPtr) == 0) {
            /* This tells us which app we are (starting from 1) */
            spawnnum = atoi(valPtr);
            app->myAppNum = spawnnum - 1;
        } else if (strcmp("argcnt", cmdPtr) == 0) {
            /* argcnt may not be set before the args */
            app->nArgs = atoi(valPtr);
        } else if (strncmp("arg", cmdPtr, 3) == 0) {
            int argnum;
            /* argcnt may not be set before the args */
            /* Handle arg%d.  Values are 1 - origin */
            argnum = atoi(cmdPtr + 3) - 1;
            if (argnum < 0 || argnum >= PMI_MAX_ARGS) {
                MPL_error_printf
                    ("Malformed PMI Spawn command; the index of an argument in the command is %d but must be between 0 and %d\n",
                     argnum, PMI_MAX_ARGS - 1);
                return 1;
            }
            args[argnum] = MPL_strdup(valPtr);
        } else if (strcmp("preput_num", cmdPtr) == 0) {
            preputNum = atoi(valPtr);
        } else if (strncmp("preput_key_", cmdPtr, 11) == 0) {
            /* Save the key */
            MPL_strncpy(key, valPtr, sizeof(key));
        } else if (strncmp("preput_val_", cmdPtr, 11) == 0) {
            /* Place the key,val into the space associate with the current
             * PMI group */
            fPMIKVSAddPair(kvs, key, valPtr);
        }
        /* Info is on a per-app basis (it is an array of info items in
         * spawn multiple).  We can ignore most info values.
         * The ones that are handled are processed by a
         * separate routine (not yet implemented).
         * simple_pmi.c sends (key,value), so we can keep just the
         * last key and pass the key/value to the registered info
         * handler, along with tha app structure.  Alternately,
         * we could save all info items and let the user's
         * spawner handle it */
        else if (strcmp("info_num", cmdPtr) == 0) {
            /* Number of info values */
            ;
        } else if (strncmp("info_key_", cmdPtr, 9) == 0) {
            /* The actual name has a digit, which indicates *which* info
             * key this is */
            curInfoIdx = atoi(cmdPtr + 9);
            MPL_strncpy(curInfoKey, valPtr, sizeof(curInfoKey));
        } else if (strncmp("info_val_", cmdPtr, 9) == 0) {
            /* The actual name has a digit, which indicates *which* info
             * value this is */
            int idx = atoi(cmdPtr + 9);
            if (idx != curInfoIdx) {
                MPL_error_printf
                    ("Malformed PMI command: info keys and values not ordered as expected (expected value %d but got %d)\n",
                     curInfoIdx, idx);
                return 1;
            } else {
                MPL_strncpy(curInfoVal, valPtr, sizeof(curInfoVal));
                /* Apply this info item */
                fPMIInfoKey(app, curInfoKey, curInfoVal);
                /* printf("Got info %s+%s\n", curInfoKey, curInfoVal); */
            }
        } else {
            MPL_error_printf("Unrecognized PMI subcommand on spawnmult: %s\n", cmdPtr);
            return 1;
        }
    }

    if (app->nArgs > 0) {
        app->args = (const char **) MPL_malloc(app->nArgs * sizeof(char *), MPL_MEM_PM);
        for (i = 0; i < app->nArgs; i++) {
            app->args[i] = args[i];
            args[i] = 0;
        }
    }

    pWorld->nApps++;

    /* Now that we've read the commands, invoke the user's spawn command */
    if (totspawns == spawnnum) {
        PMISetupNewGroup(pWorld->nProcess, kvs);

        if (userSpawner) {
            rc = (*userSpawner) (pWorld, userSpawnerData);
        } else {
            MPL_error_printf("Unable to spawn %s\n", app->exename);
            rc = 1;
            MPIE_PrintProcessWorld(stdout, pWorld);
        }

        MPL_snprintf(outbuf, PMIU_MAXLINE, "cmd=spawn_result rc=%d\n", rc);
        PMIWriteLine(pentry->fd, outbuf);
        DBG_PRINTFCOND(pmidebug, ("%s", outbuf));

        /* Clear for the next spawn */
        pentry->spawnApp = 0;
        pentry->spawnAppTail = 0;
        pentry->spawnKVS = 0;
        pentry->spawnWorld = 0;
    }

    /* If totspawnnum != spawnnum, then we are expecting a
     * spawnmult with additional items */
    return 0;
}
Ejemplo n.º 3
0
Archivo: mpit.c Proyecto: zhanglt/mpich
/* A low level, generic and internally used interface to register
 * a cvar to the MPIR_T.
 *
 * IN: dtype, MPI datatype for this cvar
 * IN: name, Name of the cvar
 * IN: addr, Pointer to the cvar if known at registeration, otherwise NULL.
 * IN: count, # of elements of this cvar if known at registeration, otherwise 0.
 * IN: etype, MPI_T_enum or MPI_T_ENUM_NULL
 * IN: verb, MPI_T_PVAR_VERBOSITY_*
 * IN: binding, MPI_T_BIND_*
 * IN: Scope, MPI_T_SCOPE_*
 * IN: get_addr, If not NULL, it is a callback to get address of the cvar.
 * IN: get_count, If not NULL, it is a callback to read count of the cvar.
 * IN: cat, Catogery name of the cvar
 * IN: desc, Description of the cvar
 */
void MPIR_T_CVAR_REGISTER_impl(
    MPI_Datatype dtype, const char* name, const void *addr, int count,
    MPIR_T_enum_t *etype, MPIR_T_verbosity_t verb, MPIR_T_bind_t binding,
    MPIR_T_scope_t scope, MPIR_T_cvar_get_addr_cb get_addr,
    MPIR_T_cvar_get_count_cb get_count, MPIR_T_cvar_value_t defaultval,
    const char *cat, const char * desc)
{
    name2index_hash_t *hash_entry;
    cvar_table_entry_t *cvar;
    int cvar_idx;

    /* Check whether this is a replicated cvar, whose name is unique. */
    HASH_FIND_STR(cvar_hash, name, hash_entry);

    if (hash_entry != NULL) {
        /* Found it, the cvar already exists */
        cvar_idx = hash_entry->idx;
        cvar = (cvar_table_entry_t *)utarray_eltptr(cvar_table, cvar_idx);
        /* Should never override an existing & active var */
        MPIU_Assert(cvar->active != TRUE);
        cvar->active = TRUE;
        /* FIXME: Do we need to check consistency between the old and new? */
    } else {
        /* Not found, so push the cvar to back of cvar_table */
        utarray_extend_back(cvar_table);
        cvar = (cvar_table_entry_t *)utarray_back(cvar_table);
        cvar->active = TRUE;
        cvar->datatype = dtype;
        cvar->name = MPL_strdup(name);
        MPIU_Assert(cvar->name);
        if (dtype != MPI_CHAR) {
            cvar->addr = (void *)addr;
        } else {
            cvar->addr = MPL_malloc(count);
            MPIU_Assert(cvar->addr);
            if (defaultval.str == NULL) {
                ((char *)(cvar->addr))[0] = '\0';
            } else {
                /* Use greater (>), since count includes the terminating '\0', but strlen does not */
                MPIU_Assert(count > strlen(defaultval.str));
                strcpy(cvar->addr, defaultval.str);
            }
        }
        cvar->count = count;
        cvar->verbosity = verb;
        cvar->bind = binding;
        cvar->scope = scope;
        cvar->get_addr = get_addr;
        cvar->get_count = get_count;
        cvar->defaultval = defaultval;
        cvar->desc = MPL_strdup(desc);
        MPIU_Assert(cvar->desc);

        /* Record <name, index> in hash table */
        cvar_idx = utarray_len(cvar_table) - 1;
        hash_entry = MPL_malloc(sizeof(name2index_hash_t));
        MPIU_Assert(hash_entry);
        /* Need not to Strdup name, since cvar_table and cvar_hash co-exist */
        hash_entry->name =name;
        hash_entry->idx = cvar_idx;
        HASH_ADD_KEYPTR(hh, cvar_hash, hash_entry->name,
                        strlen(hash_entry->name), hash_entry);

        /* Add the cvar to a category */
        MPIR_T_cat_add_cvar(cat, cvar_idx);
    }
}
Ejemplo n.º 4
0
static HYD_status group_to_nodes(char *str)
{
    char *nodes, *tnodes, *tmp, *start_str, *end_str, **set;
    int start, end, i, j, k = 0;
    HYD_status status = HYD_SUCCESS;

    for (tmp = str; *tmp != '[' && *tmp != 0; tmp++);

    if (*tmp == 0) {    /* only one node in the group */
        status = HYDU_add_to_node_list(str, tasks_per_node[k++], &global_node_list);
        HYDU_ERR_POP(status, "unable to add to node list\n");

        goto fn_exit;
    }

    /* more than one node in the group */
    *tmp = 0;
    nodes = tmp + 1;

    for (tmp = nodes; *tmp != ']' && *tmp != 0; tmp++);
    *tmp = 0;   /* remove the closing ']' */

    /* Find the number of sets */
    tnodes = MPL_strdup(nodes);
    tmp = strtok(tnodes, ",");
    for (i = 1; tmp; i++)
        tmp = strtok(NULL, ",");

    HYDU_MALLOC_OR_JUMP(set, char **, i * sizeof(char *), status);

    /* Find the actual node sets */
    set[0] = strtok(nodes, ",");
    for (i = 1; set[i - 1]; i++)
        set[i] = strtok(NULL, ",");

    for (i = 0; set[i]; i++) {
        start_str = strtok(set[i], "-");
        if ((end_str = strtok(NULL, "-")) == NULL)
            end_str = start_str;

        start = atoi(start_str);
        end = atoi(end_str);

        for (j = start; j <= end; j++) {
            char *node_str[HYD_NUM_TMP_STRINGS];

            node_str[0] = MPL_strdup(str);
            node_str[1] = HYDU_int_to_str_pad(j, strlen(start_str));
            node_str[2] = NULL;

            status = HYDU_str_alloc_and_join(node_str, &tmp);
            HYDU_ERR_POP(status, "unable to join strings\n");

            HYDU_free_strlist(node_str);

            status = HYDU_add_to_node_list(tmp, tasks_per_node[k++], &global_node_list);
            HYDU_ERR_POP(status, "unable to add to node list\n");
        }
    }

  fn_exit:
    return status;

  fn_fail:
    goto fn_exit;
}
Ejemplo n.º 5
0
/* Close one side of each pipe pair and replace stdout/err with the pipes */
int mypostfork( void *predata, void *data, ProcessState *pState )
{
    SetupInfo *s = (SetupInfo *)predata;
    int curarg=0;

    IOLabelSetupInClient( &s->labelinfo );
    PMISetupInClient( 1, &s->pmiinfo );

    /* Now, we *also* change the process state to insert the 
       interposed remote shell routine.  This is probably not
       where we want this in the final version (because MPIE_ExecProgram
       does a lot under the assumption that the started program will
       know what to do with new environment variables), but this
       will allow us to start. */
    {
	ProcessApp *app = pState->app;
	const char **newargs = 0;
	char *pmiDebugStr = 0;
	int j;
	char rankStr[12];

	/* Insert into app->args */
        newargs = (const char **) MPL_malloc( (app->nArgs + 14 + 1) *
					  sizeof(char *) );
	if (!pState->hostname) {
	    MPL_error_printf( "No hostname avaliable for %s\n", app->exename );
	    exit(1);
	}

	snprintf( rankStr, sizeof(rankStr)-1, "%d", pState->id );
	rankStr[12-1] = 0;
	curarg = 0;
        newargs[curarg++] = MPL_strdup( "-Y" );

	newargs[curarg++] = pState->hostname;
	curarg += AddEnvSetToCmdLine( "PMI_PORT", s->pmiinfo.portName, 
				      newargs + curarg );
	curarg += AddEnvSetToCmdLine( "PMI_ID", rankStr, newargs + curarg );
	pmiDebugStr = getenv( "PMI_DEBUG" );
	if (pmiDebugStr) {
	    /* Use this to help debug the connection process */
	    curarg += AddEnvSetToCmdLine( "PMI_DEBUG", pmiDebugStr, 
					  newargs + curarg );
	}

	newargs[curarg++] = app->exename;
	for (j=0; j<app->nArgs; j++) {
	    newargs[j+curarg] = app->args[j];
	}
	newargs[j+curarg] = 0;
	app->exename = MPL_strdup( "/usr/bin/ssh" );

	app->args = newargs;
	app->nArgs += curarg;

	if (MPIE_Debug) {
	    printf( "cmd = %s\n", app->exename ); fflush(stdout);
	    printf( "Number of args = %d\n", app->nArgs );
	    for (j=0; j<app->nArgs; j++) {
		printf( "argv[%d] = %s\n", j, app->args[j] ); fflush(stdout);
	    }
	}
    }

    return 0;
}
Ejemplo n.º 6
0
static HYD_status fn_info_getnodeattr(int fd, char *args[])
{
    int found;
    struct HYD_pmcd_pmi_kvs_pair *run;
    char *key, *waitval, *thrid;
    struct HYD_string_stash stash;
    char *cmd;
    struct HYD_pmcd_token *tokens = NULL;
    int token_count;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count);
    HYDU_ERR_POP(status, "unable to convert args to tokens\n");

    key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key");
    HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR, "unable to find key token\n");

    waitval = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "wait");
    thrid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "thrid");

    /* if a predefined value is not found, we let the code fall back
     * to regular search and return an error to the client */

    found = 0;
    for (run = HYD_pmcd_pmip.local.kvs->key_pair; run; run = run->next) {
        if (!strcmp(run->key, key)) {
            found = 1;
            break;
        }
    }

    if (found) {        /* We found the attribute */
        HYD_STRING_STASH_INIT(stash);
        HYD_STRING_STASH(stash, MPL_strdup("cmd=info-getnodeattr-response;"), status);
        if (thrid) {
            HYD_STRING_STASH(stash, MPL_strdup("thrid="), status);
            HYD_STRING_STASH(stash, MPL_strdup(thrid), status);
            HYD_STRING_STASH(stash, MPL_strdup(";"), status);
        }
        HYD_STRING_STASH(stash, MPL_strdup("found=TRUE;value="), status);
        HYD_STRING_STASH(stash, MPL_strdup(run->val), status);
        HYD_STRING_STASH(stash, MPL_strdup(";rc=0;"), status);

        HYD_STRING_SPIT(stash, cmd, status);

        send_cmd_downstream(fd, cmd);
        MPL_free(cmd);
    } else if (waitval && !strcmp(waitval, "TRUE")) {
        /* The client wants to wait for a response; queue up the request */
        status = HYD_pmcd_pmi_v2_queue_req(fd, -1, -1, args, key, &pending_reqs);
        HYDU_ERR_POP(status, "unable to queue request\n");

        goto fn_exit;
    } else {
        /* Tell the client that we can't find the attribute */
        HYD_STRING_STASH_INIT(stash);
        HYD_STRING_STASH(stash, MPL_strdup("cmd=info-getnodeattr-response;"), status);
        if (thrid) {
            HYD_STRING_STASH(stash, MPL_strdup("thrid="), status);
            HYD_STRING_STASH(stash, MPL_strdup(thrid), status);
            HYD_STRING_STASH(stash, MPL_strdup(";"), status);
        }
        HYD_STRING_STASH(stash, MPL_strdup("found=FALSE;rc=0;"), status);

        HYD_STRING_SPIT(stash, cmd, status);

        send_cmd_downstream(fd, cmd);
        MPL_free(cmd);
    }

  fn_exit:
    if (tokens)
        HYD_pmcd_pmi_free_tokens(tokens, token_count);
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Ejemplo n.º 7
0
static HYD_status fn_fullinit(int fd, char *args[])
{
    int id, i;
    char *rank_str;
    struct HYD_string_stash stash;
    char *cmd;
    struct HYD_pmcd_token *tokens;
    int token_count;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count);
    HYDU_ERR_POP(status, "unable to convert args to tokens\n");

    rank_str = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "pmirank");
    HYDU_ERR_CHKANDJUMP(status, rank_str == NULL, HYD_INTERNAL_ERROR,
                        "unable to find pmirank token\n");
    id = atoi(rank_str);

    /* Store the PMI_RANK to fd mapping */
    for (i = 0; i < HYD_pmcd_pmip.local.proxy_process_count; i++) {
        if (HYD_pmcd_pmip.downstream.pmi_rank[i] == id) {
            HYD_pmcd_pmip.downstream.pmi_fd[i] = fd;
            HYD_pmcd_pmip.downstream.pmi_fd_active[i] = 1;
            break;
        }
    }
    HYDU_ASSERT(i < HYD_pmcd_pmip.local.proxy_process_count, status);

    HYD_STRING_STASH_INIT(stash);
    HYD_STRING_STASH(stash,
                     MPL_strdup("cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank="),
                     status);
    HYD_STRING_STASH(stash, HYDU_int_to_str(id), status);

    HYD_STRING_STASH(stash, MPL_strdup(";size="), status);
    HYD_STRING_STASH(stash, HYDU_int_to_str(HYD_pmcd_pmip.system_global.global_process_count),
                     status);
    HYD_STRING_STASH(stash, MPL_strdup(";appnum=0"), status);
    if (HYD_pmcd_pmip.local.spawner_kvsname) {
        HYD_STRING_STASH(stash, MPL_strdup(";spawner-jobid="), status);
        HYD_STRING_STASH(stash, MPL_strdup(HYD_pmcd_pmip.local.spawner_kvsname), status);
    }
    if (HYD_pmcd_pmip.user_global.debug) {
        HYD_STRING_STASH(stash, MPL_strdup(";debugged=TRUE;pmiverbose=TRUE"), status);
    } else {
        HYD_STRING_STASH(stash, MPL_strdup(";debugged=FALSE;pmiverbose=FALSE"), status);
    }
    HYD_STRING_STASH(stash, MPL_strdup(";rc=0;"), status);

    HYD_STRING_SPIT(stash, cmd, status);

    send_cmd_downstream(fd, cmd);
    MPL_free(cmd);

  fn_exit:
    HYD_pmcd_pmi_free_tokens(tokens, token_count);
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Ejemplo n.º 8
0
HYD_status HYDT_bscd_slurm_launch_procs(char **args, struct HYD_proxy *proxy_list, int use_rmk,
                                        int *control_fd)
{
    int num_hosts, idx, i;
    int *pid, *fd_list;
    char *targs[HYD_NUM_TMP_STRINGS], *node_list_str = NULL;
    char *path = NULL, *extra_arg_list = NULL, *extra_arg;
    struct HYD_proxy *proxy;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    /* We use the following priority order for the executable path:
     * (1) user-specified; (2) search in path; (3) Hard-coded
     * location */
    if (HYDT_bsci_info.launcher_exec)
        path = MPL_strdup(HYDT_bsci_info.launcher_exec);
    if (!path)
        path = HYDU_find_full_path("srun");
    if (!path)
        path = MPL_strdup("/usr/bin/srun");

    idx = 0;
    targs[idx++] = MPL_strdup(path);

    if (use_rmk == HYD_FALSE || strcmp(HYDT_bsci_info.rmk, "slurm")) {
        targs[idx++] = MPL_strdup("--nodelist");

        status = proxy_list_to_node_str(proxy_list, &node_list_str);
        HYDU_ERR_POP(status, "unable to build a node list string\n");

        targs[idx++] = MPL_strdup(node_list_str);
    }

    num_hosts = 0;
    for (proxy = proxy_list; proxy; proxy = proxy->next)
        num_hosts++;

    targs[idx++] = MPL_strdup("-N");
    targs[idx++] = HYDU_int_to_str(num_hosts);

    targs[idx++] = MPL_strdup("-n");
    targs[idx++] = HYDU_int_to_str(num_hosts);

    /* Force srun to ignore stdin to avoid issues with
     * unexpected files open on fd 0 */
    targs[idx++] = MPL_strdup("--input");
    targs[idx++] = MPL_strdup("none");

    MPL_env2str("HYDRA_LAUNCHER_EXTRA_ARGS", (const char **) &extra_arg_list);
    if (extra_arg_list) {
        extra_arg = strtok(extra_arg_list, " ");
        while (extra_arg) {
            targs[idx++] = MPL_strdup(extra_arg);
            extra_arg = strtok(NULL, " ");
        }
    }

    /* Fill in the remaining arguments */
    /* We do not need to create a quoted version of the string for
     * SLURM. It seems to be internally quoting it anyway. */
    for (i = 0; args[i]; i++)
        targs[idx++] = MPL_strdup(args[i]);

    /* Increase pid list to accommodate the new pid */
    HYDU_MALLOC_OR_JUMP(pid, int *, (HYD_bscu_pid_count + 1) * sizeof(int), status);
    for (i = 0; i < HYD_bscu_pid_count; i++)
        pid[i] = HYD_bscu_pid_list[i];
    MPL_free(HYD_bscu_pid_list);
    HYD_bscu_pid_list = pid;

    /* Increase fd list to accommodate these new fds */
    HYDU_MALLOC_OR_JUMP(fd_list, int *, (HYD_bscu_fd_count + 3) * sizeof(int), status);
    for (i = 0; i < HYD_bscu_fd_count; i++)
        fd_list[i] = HYD_bscu_fd_list[i];
    MPL_free(HYD_bscu_fd_list);
    HYD_bscu_fd_list = fd_list;

    /* append proxy ID as -1 */
    targs[idx++] = HYDU_int_to_str(-1);
    targs[idx++] = NULL;

    if (HYDT_bsci_info.debug) {
        HYDU_dump(stdout, "Launch arguments: ");
        HYDU_print_strlist(targs);
    }

    status = HYDU_create_process(targs, NULL, NULL, &fd_stdout, &fd_stderr,
                                 &HYD_bscu_pid_list[HYD_bscu_pid_count++], -1);
    HYDU_ERR_POP(status, "create process returned error\n");

    HYD_bscu_fd_list[HYD_bscu_fd_count++] = fd_stdout;
    HYD_bscu_fd_list[HYD_bscu_fd_count++] = fd_stderr;

    status = HYDT_dmx_register_fd(1, &fd_stdout, HYD_POLLIN,
                                  (void *) (size_t) STDOUT_FILENO, HYDT_bscu_stdio_cb);
    HYDU_ERR_POP(status, "demux returned error registering fd\n");

    status = HYDT_dmx_register_fd(1, &fd_stderr, HYD_POLLIN,
                                  (void *) (size_t) STDERR_FILENO, HYDT_bscu_stdio_cb);
    HYDU_ERR_POP(status, "demux returned error registering fd\n");

  fn_exit:
    if (node_list_str)
        MPL_free(node_list_str);
    HYDU_free_strlist(targs);
    if (path)
        MPL_free(path);
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Ejemplo n.º 9
0
        publish->next = NULL;

        if (HYD_pmcd_pmi_publish_list == NULL)
            HYD_pmcd_pmi_publish_list = publish;
        else {
            for (r = HYD_pmcd_pmi_publish_list; r->next; r = r->next);
            r->next = publish;
        }
    }
    else {
        int len, recvd, closed;
        char *resp;

        /* connect to the external nameserver and store the
         * information there */
        ns = MPL_strdup(HYD_server_info.nameserver);

        ns_host = strtok(ns, ":");
        HYDU_ASSERT(ns_host, status);

        ns_port_str = strtok(NULL, ":");
        if (ns_port_str)
            ns_port = atoi(ns_port_str);
        else
            ns_port = HYDRA_NAMESERVER_DEFAULT_PORT;

        status = HYDU_sock_connect(ns_host, (uint16_t) ns_port, &ns_fd, 0, HYD_CONNECT_DELAY);
        HYDU_ERR_POP(status, "error connecting to the nameserver\n");

        HYD_STRING_STASH_INIT(stash);
        HYD_STRING_STASH(stash, MPL_strdup("PUBLISH"), status);