Пример #1
0
/*
 * Handle an incoming "put" command
 */
static int fPMI_Handle_put(PMIProcess * pentry)
{
    int rc = 0;
    PMIKVSpace *kvs;
    char kvsname[MAXKVSNAME];
    char message[PMIU_MAXLINE], outbuf[PMIU_MAXLINE];
    char key[MAXKEYLEN], val[MAXVALLEN];

    PMIU_getval("kvsname", kvsname, MAXKVSNAME);
    DBG_PRINTFCOND(pmidebug, ("Put: Finding kvs %s\n", kvsname));

    kvs = fPMIKVSFindSpace(kvsname);
    if (kvs) {
        /* should check here for duplicate key and raise error */
        PMIU_getval("key", key, MAXKEYLEN);
        PMIU_getval("value", val, MAXVALLEN);
        rc = fPMIKVSAddPair(kvs, key, val);
        if (rc == 1) {
            rc = -1;    /* no duplicate keys allowed */
            MPL_snprintf(message, PMIU_MAXLINE, "duplicate_key %s", key);
        } else if (rc == -1) {
            rc = -1;
            MPL_snprintf(message, PMIU_MAXLINE, "no_room_in_kvs_%s", kvsname);
        } else {
            rc = 0;
            MPL_strncpy(message, "success", PMIU_MAXLINE);
        }
    } else {
        rc = -1;
        MPL_snprintf(message, PMIU_MAXLINE, "kvs_%s_not_found", kvsname);
    }
    MPL_snprintf(outbuf, PMIU_MAXLINE, "cmd=put_result rc=%d msg=%s\n", rc, message);
    PMIWriteLine(pentry->fd, outbuf);
    return 0;
}
Пример #2
0
/*
 * This is the client side of the PMIserver setup.  It communicates to the
 * client the information needed to connect to the server (currently the
 * FD of a pre-existing socket).
 *
 * The env_pmi_fd and port must be static because putenv doesn't make a copy
 * of them.  It is ok to use static variable since this is called only within
 * the client; this routine will be called only once (in the forked process,
 * before the exec).
 *
 * Another wrinkle is that in order to support -(g)envnone (no environment
 * variables in context of created process), we need to add the environment
 * variables to the ones set *after* environment variables are removed, rather
 * than using putenv.
 */
int PMISetupInClient(int usePort, PMISetup * pmiinfo)
{
    static char env_pmi_fd[100];
    static char env_pmi_port[1024];

    if (usePort == 0) {
        close(pmiinfo->fdpair[0]);
        MPL_snprintf(env_pmi_fd, sizeof(env_pmi_fd), "PMI_FD=%d", pmiinfo->fdpair[1]);
        if (MPIE_Putenv(pmiinfo->pWorld, env_pmi_fd)) {
            MPL_internal_error_printf("Could not set environment PMI_FD");
            return 1;
        }
    } else {
        /* We must communicate the port name to the process */
        if (pmiinfo->portName) {
            MPL_snprintf(env_pmi_port, sizeof(env_pmi_port), "PMI_PORT=%s", pmiinfo->portName);
            if (MPIE_Putenv(pmiinfo->pWorld, env_pmi_port)) {
                MPL_internal_error_printf("Could not set environment PMI_PORT");
                perror("Reason: ");
                return 1;
            }
        } else {
            MPL_internal_error_printf("Required portname was not defined\n");
            return 1;
        }

    }
    /* Indicate that this is a spawned process */
    /* MPIE_Putenv(pmiinfo->pWorld, "PMI_SPAWNED=1"); */
    return 0;
}
Пример #3
0
/* Implement the singleton init handshake.  See the discussion in
   simplepmi.c for the protocol */
int PMI_InitSingletonConnection(int fd, PMIProcess * pmiprocess)
{
    char buf[PMIU_MAXLINE], cmd[PMIU_MAXLINE];
    int rc;
    char version[PMIU_MAXLINE], subversion[PMIU_MAXLINE];

    /* We start with the singinit command, wait for the singinit from
     * the client, and then send the singinit_info */
    MPL_snprintf(buf, PMIU_MAXLINE,
                 "cmd=singinit pmi_version=%d pmi_subversion=%d stdio=no authtype=none\n",
                 PMI_VERSION, PMI_SUBVERSION);
    PMIWriteLine(fd, buf);
    PMIReadLine(fd, buf, PMIU_MAXLINE);
    PMIU_parse_keyvals(buf);
    PMIU_getval("cmd", cmd, MAXPMICMD);
    if (strcmp(cmd, "singinit")) {
        PMIU_printf(1, "Unexpected cmd %s\n", cmd);
        return -1;
    }
    /* Could look at authtype */
    /* check version compatibility with PMI client library */
    PMIU_getval("pmi_version", version, PMIU_MAXLINE);
    PMIU_getval("pmi_subversion", subversion, PMIU_MAXLINE);
    if (PMI_VERSION == atoi(version) && PMI_SUBVERSION >= atoi(subversion))
        rc = 0;
    else
        rc = -1;

    MPL_snprintf(buf, PMIU_MAXLINE,
                 "cmd=singinit_info versionok=%s stdio=no kvsname=%s\n",
                 (rc == 0) ? "yes" : "no", (char *) (pmiprocess->group->kvs->kvsname));
    PMIWriteLine(fd, buf);

    return 0;
}
Пример #4
0
HYD_status HYDT_ckpoint_blcr_restart(const char *prefix, int pgid, int id, int ckpt_num,
                                     struct HYD_env *envlist, int num_ranks, int ranks[],
                                     int *in, int *out, int *err, int *pid)
{
    HYD_status status = HYD_SUCCESS;
    int ret;
    int context_fd;
    cr_restart_handle_t cr_handle;
    cr_restart_args_t args;
    char filename[256];
    char port_str[64];
    int port;

    HYDU_FUNC_ENTER();

    /* create listener socket for stdin/out/err */
    status = create_stdinouterr_sock(&port);
    HYDU_ERR_POP(status, "failed to create stdin/out/err socket\n");
    MPL_snprintf(port_str, sizeof(port_str), "%d", port);
    status = HYDU_append_env_to_list(STDINOUTERR_PORT_NAME, port_str, &envlist);
    HYDU_ERR_POP(status, "failed to add to env list\n");

    status = create_env_file(envlist, num_ranks, ranks);
    if (status)
        HYDU_ERR_POP(status, "blcr restart\n");

    /* open the checkpoint file */
    MPL_snprintf(filename, sizeof(filename), "%s/context-num%d-%d-%d", prefix, ckpt_num, pgid,
                 id);
    context_fd = open(filename, O_RDONLY /* | O_LARGEFILE */);
    HYDU_ERR_CHKANDJUMP(status, context_fd < 0, HYD_INTERNAL_ERROR, "open failed, %s\n",
                        strerror(errno));

    /* ... initialize the request structure */
    cr_initialize_restart_args_t(&args);
    args.cr_fd = context_fd;
    args.cr_flags = CR_RSTRT_RESTORE_PID;

    /* ... issue the request */
    ret = cr_request_restart(&args, &cr_handle);
    HYDU_ERR_CHKANDJUMP(status, ret, HYD_INTERNAL_ERROR, "cr_request_restart failed, %s\n",
                        strerror(errno));

    ret = close(context_fd);
    HYDU_ERR_CHKANDJUMP(status, ret, HYD_INTERNAL_ERROR, "close failed, %s\n",
                        strerror(errno));

    /* get fds for stdin/out/err sockets, and get pids of restarted processes */
    status = wait_for_stdinouterr_sockets(num_ranks, ranks, in, out, err, pid);
    if (status)
        HYDU_ERR_POP(status, "blcr restart\n");

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #5
0
static int getConnInfoKVS( int rank, char *buf, int bufsize, MPIDI_PG_t *pg )
{
#ifdef USE_PMI2_API
    char key[MPIDI_MAX_KVS_KEY_LEN];
    int  mpi_errno = MPI_SUCCESS, rc;
    int vallen;

    rc = MPL_snprintf(key, MPIDI_MAX_KVS_KEY_LEN, "P%d-businesscard", rank );
    if (rc < 0 || rc > MPIDI_MAX_KVS_KEY_LEN) {
	MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem");
    }

    mpi_errno = PMI2_KVS_Get(pg->connData, PMI2_ID_NULL, key, buf, bufsize, &vallen);
    if (mpi_errno) {
	MPIDI_PG_CheckForSingleton();
	mpi_errno = PMI2_KVS_Get(pg->connData, PMI2_ID_NULL, key, buf, bufsize, &vallen);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);
    }
 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
#else
    char key[MPIDI_MAX_KVS_KEY_LEN];
    int  mpi_errno = MPI_SUCCESS, rc, pmi_errno;

    rc = MPL_snprintf(key, MPIDI_MAX_KVS_KEY_LEN, "P%d-businesscard", rank );
    if (rc < 0 || rc > MPIDI_MAX_KVS_KEY_LEN) {
	MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem");
    }

    MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_PMI_MUTEX);
    pmi_errno = PMI_KVS_Get(pg->connData, key, buf, bufsize );
    if (pmi_errno) {
	MPIDI_PG_CheckForSingleton();
	pmi_errno = PMI_KVS_Get(pg->connData, key, buf, bufsize );
    }
    MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_PMI_MUTEX);
    if (pmi_errno) {
	MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**pmi_kvs_get");
    }

 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
#endif
}
Пример #6
0
static char *get_random_color_str(void)
{
    unsigned char r,g,b;
    random_color(&r, &g, &b);
    MPL_snprintf(random_color_str, MAX_RANDOM_COLOR_STR, "%3d %3d %3d", (int)r, (int)g, (int)b);
    return random_color_str;
}
Пример #7
0
static HYD_status send_cmd_downstream(int fd, const char *cmd)
{
    char cmdlen[7];
    int sent, closed;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    MPL_snprintf(cmdlen, 7, "%6u", (unsigned) strlen(cmd));
    status = HYDU_sock_write(fd, cmdlen, 6, &sent, &closed, HYDU_SOCK_COMM_MSGWAIT);
    HYDU_ERR_POP(status, "error writing PMI line\n");
    /* FIXME: We cannot abort when we are not able to send data
     * downstream. The upper layer needs to handle this based on
     * whether we want to abort or not.*/
    HYDU_ASSERT(!closed, status);

    if (HYD_pmcd_pmip.user_global.debug) {
        HYDU_dump(stdout, "PMI response: %s\n", cmd);
    }

    status = HYDU_sock_write(fd, cmd, strlen(cmd), &sent, &closed, HYDU_SOCK_COMM_MSGWAIT);
    HYDU_ERR_POP(status, "error writing PMI line\n");
    HYDU_ASSERT(!closed, status);

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #8
0
HYD_status HYD_str_alloc_and_join(char **strlist, char **strjoin)
{
    int len = 0, i, count;
    HYD_status status = HYD_SUCCESS;

    HYD_FUNC_ENTER();

    for (i = 0; strlist[i] != NULL; i++) {
        len += strlen(strlist[i]);
    }

    HYD_MALLOC(*strjoin, char *, len + 1, status);
    count = 0;
    (*strjoin)[0] = 0;

    for (i = 0; strlist[i] != NULL; i++) {
        MPL_snprintf(*strjoin + count, len - count + 1, "%s", strlist[i]);
        count += strlen(strlist[i]);
    }

  fn_exit:
    HYD_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #9
0
int MPI_Get_library_version(char *version, int *resultlen)
{
    int mpi_errno = MPI_SUCCESS;
    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_GET_LIBRARY_VERSION);

    /* Note that this routine may be called before MPI_Init */

    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_GET_LIBRARY_VERSION);

    /* Validate parameters and objects (post conversion) */
#ifdef HAVE_ERROR_CHECKING
    {
        MPID_BEGIN_ERROR_CHECKS;
        {
            MPIR_ERRTEST_ARGNULL(version, "version", mpi_errno);
            MPIR_ERRTEST_ARGNULL(resultlen, "resultlen", mpi_errno);
        }
        MPID_END_ERROR_CHECKS;
    }
#endif /* HAVE_ERROR_CHECKING */

    /* ... body of routine ...  */

    MPL_snprintf(version, MPI_MAX_LIBRARY_VERSION_STRING,
                 "MPICH Version:\t%s\n"
                 "MPICH Release date:\t%s\n"
                 "MPICH Device:\t%s\n"
                 "MPICH configure:\t%s\n"
                 "MPICH CC:\t%s\n"
                 "MPICH CXX:\t%s\n"
                 "MPICH F77:\t%s\n"
                 "MPICH FC:\t%s\n",
                 MPII_Version_string, MPII_Version_date, MPII_Version_device,
                 MPII_Version_configure, MPII_Version_CC, MPII_Version_CXX,
                 MPII_Version_F77, MPII_Version_FC);

    *resultlen = (int)strlen(version);

    /* ... end of body of routine ... */

#ifdef HAVE_ERROR_CHECKING
  fn_exit:
#endif
    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_GET_LIBRARY_VERSION);
    return mpi_errno;

    /* --BEGIN ERROR HANDLING-- */
#ifdef HAVE_ERROR_CHECKING
  fn_fail:
    {
        mpi_errno =
            MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__,
                                 MPI_ERR_OTHER, "**mpi_get_library_version",
                                 "**mpi_get_library_version %p %p", version, resultlen);
    }
    mpi_errno = MPIR_Err_return_comm(0, FCNAME, mpi_errno);
    goto fn_exit;
#endif
    /* --END ERROR HANDLING-- */
}
Пример #10
0
static HYD_status pmi_kvsname_fn(char *arg, char ***argv)
{
    MPL_snprintf(HYD_pmcd_pmip.local.kvs->kvsname, PMI_MAXKVSLEN, "%s", **argv);
    (*argv)++;

    return HYD_SUCCESS;
}
Пример #11
0
static void dump_context_id(MPIR_Context_id_t context_id, char *out_str, int len)
{
    int subcomm_type = MPIR_CONTEXT_READ_FIELD(SUBCOMM, context_id);
    const char *subcomm_type_name = NULL;

    switch (subcomm_type) {
    case 0:
        subcomm_type_name = "parent";
        break;
    case 1:
        subcomm_type_name = "intranode";
        break;
    case 2:
        subcomm_type_name = "internode";
        break;
    default:
        MPIR_Assert(FALSE);
        break;
    }
    MPL_snprintf(out_str, len,
                  "context_id=%d (%#x): DYNAMIC_PROC=%d PREFIX=%#x IS_LOCALCOMM=%d SUBCOMM=%s SUFFIX=%s",
                  context_id,
                  context_id,
                  MPIR_CONTEXT_READ_FIELD(DYNAMIC_PROC, context_id),
                  MPIR_CONTEXT_READ_FIELD(PREFIX, context_id),
                  MPIR_CONTEXT_READ_FIELD(IS_LOCALCOMM, context_id),
                  subcomm_type_name,
                  (MPIR_CONTEXT_READ_FIELD(SUFFIX, context_id) ? "coll" : "pt2pt"));
}
Пример #12
0
static HYD_status create_env_file(const struct HYD_env *envlist, int num_ranks, int *ranks)
{
    HYD_status status = HYD_SUCCESS;
    char filename[256];
    FILE *f;
    const struct HYD_env *e;
    int ret;
    int r;

    HYDU_FUNC_ENTER();

    for (r = 0; r < num_ranks; ++r) {
        MPL_snprintf(filename, sizeof(filename), "/tmp/hydra-env-file-%d:%d", (int) getpid(),
                     ranks[r]);

        f = fopen(filename, "w");
        HYDU_ERR_CHKANDJUMP(status, f == NULL, HYD_INTERNAL_ERROR, "fopen failed: %s\n",
                            strerror(errno));

        for (e = envlist; e; e = e->next) {
            fprintf(f, "%s=%s\n", e->env_name, e->env_value);
        }

        ret = fclose(f);
        HYDU_ERR_CHKANDJUMP(status, ret, HYD_INTERNAL_ERROR, "fclose failed: %s\n",
                            strerror(errno));
    }

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #13
0
int PMI_KVS_Put( const char kvsname[], const char key[], const char value[] )
{
    char buf[PMIU_MAXLINE];
    int  err = PMI_SUCCESS;
    int  rc;

    /* This is a special hack to support singleton initialization */
    if (PMI_initialized == SINGLETON_INIT_BUT_NO_PM) {
        if (cached_singinit_inuse)
            return PMI_FAIL;
	rc = MPL_strncpy(cached_singinit_key,key,PMI_keylen_max);
	if (rc != 0) return PMI_FAIL;
	rc = MPL_strncpy(cached_singinit_val,value,PMI_vallen_max);
	if (rc != 0) return PMI_FAIL;
        cached_singinit_inuse = 1;
	return PMI_SUCCESS;
    }
    
    rc = MPL_snprintf( buf, PMIU_MAXLINE, 
			"cmd=put kvsname=%s key=%s value=%s\n",
			kvsname, key, value);
    if (rc < 0) return PMI_FAIL;
    err = GetResponse( buf, "put_result", 1 );
    return err;
}
Пример #14
0
int MPIO_Err_return_file(MPI_File mpi_fh, int error_code)
{
    MPI_Errhandler e;
    void (*c_errhandler) (MPI_File *, int *, ...);
    int kind;                   /* Error handler kind (see below) */
    char error_msg[4096];
    int len;

    /* If the file pointer is not valid, we use the handler on
     * MPI_FILE_NULL (MPI-2, section 9.7).  For now, this code assumes that
     * MPI_FILE_NULL has the default handler (return).  FIXME.  See
     * below - the set error handler uses ADIOI_DFLT_ERR_HANDLER;
     */

    /* First, get the handler and the corresponding function */
    if (mpi_fh == MPI_FILE_NULL) {
        e = ADIOI_DFLT_ERR_HANDLER;
    } else {
        ADIO_File fh;

        fh = MPIO_File_resolve(mpi_fh);
        e = fh->err_handler;
    }

    /* Actually, e is just the value provide by the MPICH routines
     * file_set_errhandler.  This is actually a *pointer* to the
     * errhandler structure.  We don't know that, so we ask
     * the MPICH code to translate this object into an error handler.
     * kind = 0: errors are fatal
     * kind = 1: errors return
     * kind = 2: errors call function
     */
    if (e == MPI_ERRORS_RETURN || e == MPIR_ERRORS_THROW_EXCEPTIONS || !e) {
        /* FIXME: This is a hack in case no error handler was set */
        kind = 1;
        c_errhandler = 0;
    } else {
        MPIR_Get_file_error_routine(e, &c_errhandler, &kind);
    }

    /* --BEGIN ERROR HANDLING-- */
    if (MPIR_Err_is_fatal(error_code) || kind == 0) {
        ADIO_File fh = MPIO_File_resolve(mpi_fh);

        MPL_snprintf(error_msg, 4096, "I/O error: ");
        len = (int) strlen(error_msg);
        MPIR_Err_get_string(error_code, &error_msg[len], 4096 - len, NULL);
        MPIR_Abort(fh->comm, MPI_SUCCESS, error_code, error_msg);
    }
    /* --END ERROR HANDLING-- */
    else if (kind == 2) {
        (*c_errhandler) (&mpi_fh, &error_code, 0);
    } else if (kind == 3) {
        MPIR_File_call_cxx_errhandler(&mpi_fh, &error_code, c_errhandler);
    }

    /* kind == 1 just returns */
    return error_code;
}
Пример #15
0
void MPIDU_Ftb_publish_me(const char *event_name)
{
    char payload[FTB_MAX_PAYLOAD_DATA] = "";

    MPL_snprintf(payload, sizeof(payload), "[id: {%s:{%d}}]", (char *)MPIDI_Process.my_pg->id, MPIDI_Process.my_pg_rank);
    MPIDU_Ftb_publish(event_name, payload);
    return;
}
Пример #16
0
/*
 * These routines are called when communication is established through
 * a port instead of an fd, and no information is communicated
 * through environment variables.
 */
static int fPMI_Handle_init_port(PMIProcess * pentry)
{
    char outbuf[PMIU_MAXLINE];

    DBG_PRINTFCOND(pmidebug, ("Entering fPMI_Handle_init_port to start connection\n"));

    /* simple_pmi wants to see cmd=initack after the initack request before
     * the other data */
    PMIWriteLine(pentry->fd, "cmd=initack\n");
    MPL_snprintf(outbuf, PMIU_MAXLINE, "cmd=set size=%d\n", pentry->group->nProcess);
    PMIWriteLine(pentry->fd, outbuf);
    MPL_snprintf(outbuf, PMIU_MAXLINE, "cmd=set rank=%d\n", pentry->pState->wRank);
    PMIWriteLine(pentry->fd, outbuf);
    MPL_snprintf(outbuf, PMIU_MAXLINE, "cmd=set debug=%d\n", pmidebug);
    PMIWriteLine(pentry->fd, outbuf);
    return 0;
}
Пример #17
0
static char *get_random_color_str(void)
{
    unsigned char r,g,b;
    random_color(&r, &g, &b);
    MPL_snprintf(random_color_str, sizeof(random_color_str),
		  "%3d %3d %3d", (int)r, (int)g, (int)b);
    return random_color_str;
}
Пример #18
0
/* Handle an incoming get_appnum command */
static int fPMI_Handle_get_appnum(PMIProcess * pentry)
{
    ProcessApp *app = pentry->pState->app;
    char outbuf[PMIU_MAXLINE];
    MPL_snprintf(outbuf, PMIU_MAXLINE, "cmd=appnum appnum=%d\n", app->myAppNum);
    PMIWriteLine(pentry->fd, outbuf);
    DBG_PRINTFCOND(pmidebug, ("%s", outbuf));
    return 0;
}
Пример #19
0
void MPIDU_Ftb_publish_vc(const char *event_name, struct MPIDI_VC *vc)
{
    char payload[FTB_MAX_PAYLOAD_DATA] = "";

    if (vc && vc->pg)  /* pg can be null for temp VCs (dynamic processes) */
        MPL_snprintf(payload, sizeof(payload), "[id: {%s:{%d}}]", (char*)vc->pg->id, vc->pg_rank);
    MPIDU_Ftb_publish(event_name, payload);
    return;
}
Пример #20
0
/* Handle an incoming get_universe_size command */
static int fPMI_Handle_get_universe_size(PMIProcess * pentry)
{
    char outbuf[PMIU_MAXLINE];
    /* Import the universe size from the process structures */
    MPL_snprintf(outbuf, PMIU_MAXLINE, "cmd=universe_size size=%d\n", pUniv.size);
    PMIWriteLine(pentry->fd, outbuf);
    DBG_PRINTFCOND(pmidebug, ("%s", outbuf));
    return 0;
}
Пример #21
0
static int build_cb_config_list(ADIO_File fd, 
	MPI_Comm orig_comm, MPI_Comm comm, 
	int rank, int procs, int *error_code)
{
    ADIO_cb_name_array array;
    int *tmp_ranklist;
    int rank_ct;
    char *value;
    static char myname[] = "ADIO_OPEN cb_config_list";

    /* gather the processor name array if we don't already have it */
    /* this has to be done early in ADIO_Open so that we can cache the name
     * array in both the dup'd communicator (in case we want it later) and the
     * original communicator */
    ADIOI_cb_gather_name_array(orig_comm, comm, &array);

/* parse the cb_config_list and create a rank map on rank 0 */
    if (rank == 0) {
	tmp_ranklist = (int *) ADIOI_Malloc(sizeof(int) * procs);
	if (tmp_ranklist == NULL) {
	    *error_code = MPIO_Err_create_code(*error_code,
					       MPIR_ERR_RECOVERABLE,
					       myname,
					       __LINE__,
					       MPI_ERR_OTHER,
					       "**nomem2",0);
	    return 0;
	}

	rank_ct = ADIOI_cb_config_list_parse(fd->hints->cb_config_list, 
					     array, tmp_ranklist,
					     fd->hints->cb_nodes);

	/* store the ranklist using the minimum amount of memory */
	if (rank_ct > 0) {
	    fd->hints->ranklist = (int *) ADIOI_Malloc(sizeof(int) * rank_ct);
	    memcpy(fd->hints->ranklist, tmp_ranklist, sizeof(int) * rank_ct);
	}
	ADIOI_Free(tmp_ranklist);
	fd->hints->cb_nodes = rank_ct;
	/* TEMPORARY -- REMOVE WHEN NO LONGER UPDATING INFO FOR FS-INDEP. */
	value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
	MPL_snprintf(value, MPI_MAX_INFO_VAL+1, "%d", rank_ct);
	ADIOI_Info_set(fd->info, "cb_nodes", value);
	ADIOI_Free(value);
    }

    ADIOI_cb_bcast_rank_map(fd);
    if (fd->hints->cb_nodes <= 0) {
	*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
					   myname, __LINE__, MPI_ERR_IO,
					   "**ioagnomatch", 0);
	fd = ADIO_FILE_NULL;
    }
    return 0;
}
Пример #22
0
int MPID_nem_tcp_get_business_card (int my_rank, char **bc_val_p, int *val_max_sz_p)
{
    int mpi_errno = MPI_SUCCESS;
    int str_errno = MPIU_STR_SUCCESS;
    MPIDU_Sock_ifaddr_t ifaddr;
    char ifname[MAX_HOST_DESCRIPTION_LEN];
    int ret;
    struct sockaddr_in sock_id;
    socklen_t len;
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_GET_BUSINESS_CARD);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_GET_BUSINESS_CARD);
    
    mpi_errno = GetSockInterfaceAddr(my_rank, ifname, sizeof(ifname), &ifaddr);
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
    
    
    str_errno = MPIU_Str_add_string_arg(bc_val_p, val_max_sz_p, MPIDI_CH3I_HOST_DESCRIPTION_KEY, ifname);
    if (str_errno) {
        MPIR_ERR_CHKANDJUMP(str_errno == MPIU_STR_NOMEM, mpi_errno, MPI_ERR_OTHER, "**buscard_len");
        MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**buscard");
    }

    len = sizeof(sock_id);
    ret = getsockname (MPID_nem_tcp_g_lstn_sc.fd, (struct sockaddr *)&sock_id, &len);
    MPIR_ERR_CHKANDJUMP1 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**getsockname", "**getsockname %s", MPIU_Strerror (errno));

    str_errno = MPIU_Str_add_int_arg (bc_val_p, val_max_sz_p, MPIDI_CH3I_PORT_KEY, ntohs(sock_id.sin_port));
    if (str_errno) {
        MPIR_ERR_CHKANDJUMP(str_errno == MPIU_STR_NOMEM, mpi_errno, MPI_ERR_OTHER, "**buscard_len");
        MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**buscard");
    }
    
    if (ifaddr.len > 0 && ifaddr.type == AF_INET)
    {
        unsigned char *p;
        p = (unsigned char *)(ifaddr.ifaddr);
        MPL_snprintf( ifname, sizeof(ifname), "%u.%u.%u.%u", p[0], p[1], p[2], p[3] );
        MPIU_DBG_MSG_S(CH3_CONNECT,VERBOSE,"ifname = %s",ifname );
        str_errno = MPIU_Str_add_string_arg(bc_val_p, val_max_sz_p, MPIDI_CH3I_IFNAME_KEY, ifname);
        if (str_errno) {
            MPIR_ERR_CHKANDJUMP(str_errno == MPIU_STR_NOMEM, mpi_errno, MPI_ERR_OTHER, "**buscard_len");
            MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**buscard");
        }
    }
    

    /*     printf("MPID_nem_tcp_get_business_card. port=%d\n", sock_id.sin_port); */

 fn_exit:
/*     fprintf(stdout, "MPID_nem_tcp_get_business_card Exit, mpi_errno=%d\n", mpi_errno); fflush(stdout); */
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_GET_BUSINESS_CARD);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
Пример #23
0
/* Handle an incoming "get_maxes" command */
static int fPMI_Handle_get_maxes(PMIProcess * pentry)
{
    char outbuf[PMIU_MAXLINE];
    MPL_snprintf(outbuf, PMIU_MAXLINE,
                 "cmd=maxes kvsname_max=%d keylen_max=%d vallen_max=%d\n",
                 MAXKVSNAME, MAXKEYLEN, MAXVALLEN);
    PMIWriteLine(pentry->fd, outbuf);
    DBG_PRINTFCOND(pmidebug, ("%s", outbuf));
    return 0;
}
Пример #24
0
/*
 * Handle an incoming "destroy_kvs" command
 */
static int fPMI_Handle_destroy_kvs(PMIProcess * pentry)
{
    int rc = 0;
    PMIKVSpace *kvs;
    char kvsname[MAXKVSNAME];
    char message[PMIU_MAXLINE], outbuf[PMIU_MAXLINE];

    PMIU_getval("kvsname", kvsname, MAXKVSNAME);
    kvs = fPMIKVSFindSpace(kvsname);
    if (kvs) {
        PMIKVSFree(kvs);
        MPL_snprintf(message, PMIU_MAXLINE, "KVS_%s_successfully_destroyed", kvsname);
    } else {
        MPL_snprintf(message, PMIU_MAXLINE, "KVS %s not found", kvsname);
        rc = -1;
    }
    MPL_snprintf(outbuf, PMIU_MAXLINE, "cmd=kvs_destroyed rc=%d msg=%s\n", rc, message);
    PMIWriteLine(pentry->fd, outbuf);
    return 0;
}
Пример #25
0
/* ------------------------------------------------------------------------- */
static int fPMI_Handle_finalize(PMIProcess * pentry)
{
    char outbuf[PMIU_MAXLINE];

    pentry->pState->status = PROCESS_FINALIZED;

    /* send back an acknowledgement to release the process */
    MPL_snprintf(outbuf, PMIU_MAXLINE, "cmd=finalize_ack\n");
    PMIWriteLine(pentry->fd, outbuf);

    return 0;
}
Пример #26
0
/*
 * Handle incoming "getbyidx" command
 */
static int fPMI_Handle_getbyidx(PMIProcess * pentry)
{
    int j, jNext, rc = 0;
    PMIKVSpace *kvs;
    char kvsname[MAXKVSNAME], j_char[8], outbuf[PMIU_MAXLINE];
    PMIKVPair *p;

    PMIU_getval("kvsname", kvsname, MAXKVSNAME);
    kvs = fPMIKVSFindSpace(kvsname);
    if (kvs) {
        PMIU_getval("idx", j_char, sizeof(j_char));
        j = atoi(j_char);
        jNext = j + 1;
        if (kvs->lastIdx >= 0 && j >= kvs->lastIdx) {
            for (p = kvs->lastByIdx, j -= kvs->lastIdx; j-- > 0 && p; p = p->nextPair);
        } else {
            for (p = kvs->pairs; j-- > 0 && p; p = p->nextPair);
        }
        if (p) {
            MPL_snprintf(outbuf, PMIU_MAXLINE, "cmd=getbyidx_results "
                         "rc=0 nextidx=%d key=%s val=%s\n", jNext, p->key, p->val);
            kvs->lastIdx = jNext - 1;
            kvs->lastByIdx = p;
        } else {
            MPL_snprintf(outbuf, PMIU_MAXLINE, "cmd=getbyidx_results rc=-1 "
                         "reason=no_more_keyvals\n");
            kvs->lastIdx = -1;
            kvs->lastByIdx = 0;
        }
    } else {
        rc = -1;
        MPL_snprintf(outbuf, PMIU_MAXLINE, "cmd=getbyidx_results rc=-1 "
                     "reason=kvs_%s_not_found\n", kvsname);
    }

    PMIWriteLine(pentry->fd, outbuf);
    DBG_PRINTFCOND(pmidebug, ("%s", outbuf));
    return rc;
}
Пример #27
0
int PMI_Abort(int exit_code, const char error_msg[])
{
    char buf[PMIU_MAXLINE];

    /* include exit_code in the abort command */
    MPL_snprintf( buf, PMIU_MAXLINE, "cmd=abort exitcode=%d\n", exit_code);

    PMIU_printf(PMI_debug, "aborting job:\n%s\n", error_msg);
    GetResponse( buf, "", 0 );

    /* the above command should not return */
    return PMI_FAIL;
}
Пример #28
0
HYD_status HYD_pmcd_pmi_add_kvs(const char *key, char *val, struct HYD_pmcd_pmi_kvs *kvs, int *ret)
{
    struct HYD_pmcd_pmi_kvs_pair *key_pair, *run, *last;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    HYDU_MALLOC(key_pair, struct HYD_pmcd_pmi_kvs_pair *, sizeof(struct HYD_pmcd_pmi_kvs_pair),
                status);
    MPL_snprintf(key_pair->key, PMI_MAXKEYLEN, "%s", key);
    MPL_snprintf(key_pair->val, PMI_MAXVALLEN, "%s", val);
    key_pair->next = NULL;

    *ret = 0;

    if (kvs->key_pair == NULL) {
        kvs->key_pair = key_pair;
    }
    else {
        for (run = kvs->key_pair; run; run = run->next) {
            if (!strcmp(run->key, key_pair->key)) {
                /* duplicate key found */
                *ret = -1;
                goto fn_fail;
            }
            last = run;
        }
        /* Add key_pair to end of list. */
        last->next = key_pair;
    }

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    HYDU_FREE(key_pair);
    goto fn_exit;
}
Пример #29
0
HYD_status
HYDU_sock_create_and_listen_portstr(char *iface, char *hostname, char *port_range,
                                    char **port_str,
                                    HYD_status(*callback) (int fd, HYD_event_t events,
                                                           void *userp), void *userp)
{
    int listenfd;
    char *sport, *real_port_range, *ip = NULL;
    uint16_t port;
    HYD_status status = HYD_SUCCESS;

    /* Listen on a port in the port range */
    port = 0;
    real_port_range = port_range ? MPL_strdup(port_range) : NULL;
    status = HYDU_sock_listen(&listenfd, real_port_range, &port);
    HYDU_ERR_POP(status, "unable to listen on port\n");

    /* Register the listening socket with the demux engine */
    status = HYDT_dmx_register_fd(1, &listenfd, HYD_POLLIN, userp, callback);
    HYDU_ERR_POP(status, "unable to register fd\n");

    /* Create a port string for MPI processes to use to connect to */
    if (iface) {
        status = HYDU_sock_get_iface_ip(iface, &ip);
        HYDU_ERR_POP(status, "unable to get network interface IP\n");
    }
    else if (hostname) {
        ip = MPL_strdup(hostname);
    }
    else {
        char localhost[MAX_HOSTNAME_LEN] = { 0 };

        if (gethostname(localhost, MAX_HOSTNAME_LEN) < 0)
            HYDU_ERR_SETANDJUMP(status, HYD_SOCK_ERROR, "unable to get local hostname\n");

        ip = MPL_strdup(localhost);
    }

    sport = HYDU_int_to_str(port);
    HYDU_MALLOC_OR_JUMP(*port_str, char *, strlen(ip) + 1 + strlen(sport) + 1, status);
    MPL_snprintf(*port_str, strlen(ip) + 1 + strlen(sport) + 1, "%s:%s", ip, sport);
    MPL_free(sport);

  fn_exit:
    if (ip)
        MPL_free(ip);
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #30
0
int MPID_Abort(MPIR_Comm * comm, int mpi_errno, int exit_code, const char *error_msg)
{
    char sys_str[MPI_MAX_ERROR_STRING + 5] = "";
    char comm_str[MPI_MAX_ERROR_STRING] = "";
    char world_str[MPI_MAX_ERROR_STRING] = "";
    char error_str[2 * MPI_MAX_ERROR_STRING + 128];
    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_ABORT);
    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_ABORT);

    if (MPIR_Process.comm_world) {
        int rank = MPIR_Process.comm_world->rank;
        snprintf(world_str, sizeof(world_str), " on node %d", rank);
    }

    if (comm) {
        int rank = comm->rank;
        int context_id = comm->context_id;
        snprintf(comm_str, sizeof(comm_str), " (rank %d in comm %d)", rank, context_id);
    }

    if (!error_msg)
        error_msg = "Internal error";

    if (mpi_errno != MPI_SUCCESS) {
        char msg[MPI_MAX_ERROR_STRING] = "";
        MPIR_Err_get_string(mpi_errno, msg, MPI_MAX_ERROR_STRING, NULL);
        snprintf(sys_str, sizeof(msg), " (%s)", msg);
    }
    MPL_snprintf(error_str, sizeof(error_str), "Abort(%d)%s%s: %s%s\n",
                 exit_code, world_str, comm_str, error_msg, sys_str);
    MPL_error_printf("%s", error_str);

    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_ABORT);
    fflush(stderr);
    fflush(stdout);
    if (NULL == comm || (MPIR_Comm_size(comm) == 1 && comm->comm_kind == MPIR_COMM_KIND__INTRACOMM))
        MPL_exit(exit_code);

    if (comm != MPIR_Process.comm_world) {
        MPIDIG_comm_abort(comm, exit_code);
    } else {
#ifdef USE_PMIX_API
        PMIx_Abort(exit_code, error_msg, NULL, 0);
#elif defined(USE_PMI2_API)
        PMI2_Abort(TRUE, error_msg);
#else
        PMI_Abort(exit_code, error_msg);
#endif
    }
    return 0;
}