Ejemplo n.º 1
0
int test_connect_disconnect(char *my_nspace, int my_rank)
{
    int rc;
    pmix_proc_t proc;
    char nspace[PMIX_MAX_NSLEN+1];
    pmix_rank_t newrank;
    cd_cbdata cbdata;

    (void)strncpy(proc.nspace, my_nspace, PMIX_MAX_NSLEN);
    proc.rank = PMIX_RANK_WILDCARD;

    rc = PMIx_Connect(&proc, 1, NULL, 0, nspace, &newrank);
    if (PMIX_SUCCESS != rc) {
        TEST_ERROR(("%s:%d: Connect blocking test failed.", my_nspace, my_rank));
        return PMIX_ERROR;
    }
    TEST_VERBOSE(("%s:%d: Connect blocking test succeded to nspace %s.", my_nspace, my_rank, nspace));

    rc = PMIx_Disconnect(nspace, NULL, 0);
    if (PMIX_SUCCESS != rc) {
        TEST_ERROR(("%s:%d: Disconnect blocking test failed.", my_nspace, my_rank));
        return PMIX_ERROR;
    }
    TEST_VERBOSE(("%s:%d: Disconnect blocking test succeded.", my_nspace, my_rank));

    cbdata.in_progress = 1;
    rc = PMIx_Connect_nb(&proc, 1, NULL, 0, cnct_cb, &cbdata);
    if (PMIX_SUCCESS == rc) {
        PMIX_WAIT_FOR_COMPLETION(cbdata.in_progress);
        rc = cbdata.status;
    }
    if (PMIX_SUCCESS != rc) {
        TEST_ERROR(("%s:%d: Connect non-blocking test failed.", my_nspace, my_rank));
        return PMIX_ERROR;
    }
    TEST_VERBOSE(("%s:%d: Connect non-blocking test succeded.", my_nspace, my_rank));

    cbdata.in_progress = 1;
    rc = PMIx_Disconnect_nb(nspace, NULL, 0, cd_cb, &cbdata);
    if (PMIX_SUCCESS == rc) {
        PMIX_WAIT_FOR_COMPLETION(cbdata.in_progress);
        rc = cbdata.status;
    }
    if (PMIX_SUCCESS != rc) {
        TEST_ERROR(("%s:%d: Disconnect non-blocking test failed.", my_nspace, my_rank));
        return PMIX_ERROR;
    }
    TEST_VERBOSE(("%s:%d: Disconnect non-blocking test succeded.", my_nspace, my_rank));
    return PMIX_SUCCESS;
}
Ejemplo n.º 2
0
int test_cd_common(pmix_proc_t *procs, size_t nprocs, int blocking, int disconnect)
{
    int rc;
    if (blocking) {
        if (!disconnect) {
            rc = PMIx_Connect(procs, nprocs, NULL, 0);
        } else {
            rc = PMIx_Disconnect(procs, nprocs, NULL, 0);
        }
    } else {
        cd_cbdata cbdata;
        cbdata.in_progress = 1;
        if (!disconnect) {
            rc = PMIx_Connect_nb(procs, nprocs, NULL, 0, cd_cb, (void*)&cbdata);
        } else {
            rc = PMIx_Disconnect_nb(procs, nprocs, NULL, 0, cd_cb, (void*)&cbdata);
        }
        if (PMIX_SUCCESS == rc) {
            PMIX_WAIT_FOR_COMPLETION(cbdata.in_progress);
            rc = cbdata.status;
        }
    }
    /* the host server callback currently returns PMIX_EXISTS status for checking purposes */
    if (PMIX_EXISTS == rc) {
        rc = PMIX_SUCCESS;
    }
    return rc;
}
Ejemplo n.º 3
0
PMIX_EXPORT pmix_status_t PMIx_Publish(const pmix_info_t info[],
                                       size_t ninfo)
{
    pmix_status_t rc;
    pmix_cb_t *cb;

    pmix_output_verbose(2, pmix_globals.debug_output,
                        "pmix: publish called");

    if (pmix_globals.init_cntr <= 0) {
        return PMIX_ERR_INIT;
    }

    /* if we aren't connected, don't attempt to send */
    if (!pmix_globals.connected) {
        return PMIX_ERR_UNREACH;
    }

    /* create a callback object to let us know when it is done */
    cb = PMIX_NEW(pmix_cb_t);
    cb->active = true;

    if (PMIX_SUCCESS != (rc = PMIx_Publish_nb(info, ninfo, op_cbfunc, cb))) {
        PMIX_ERROR_LOG(rc);
        PMIX_RELEASE(cb);
        return rc;
    }

    /* wait for the server to ack our request */
    PMIX_WAIT_FOR_COMPLETION(cb->active);
    rc = (pmix_status_t)cb->status;
    PMIX_RELEASE(cb);

    return rc;
}
Ejemplo n.º 4
0
static int test_spawn_common(char *my_nspace, int my_rank, int blocking)
{
    int rc;
    pmix_app_t *apps;
    size_t napps;
    char nspace[PMIX_MAX_NSLEN+1];
    memset(nspace, 0, PMIX_MAX_NSLEN+1);
    napps = 1;
    PMIX_APP_CREATE(apps, napps);
    if (blocking) {
        if (PMIX_SUCCESS != (rc = PMIx_Spawn(NULL, 0, apps, napps, nspace))) {
            PMIX_APP_FREE(apps, napps);
            return rc;
        }
    } else {
        spawn_cbdata cbdata;
        cbdata.in_progress = 1;
        memset(cbdata.nspace, 0, PMIX_MAX_NSLEN);
        rc = PMIx_Spawn_nb(NULL, 0, apps, napps, spawn_cb, (void*)&cbdata);
        if (PMIX_SUCCESS != rc) {
            PMIX_APP_FREE(apps, napps);
            return rc;
        }
        PMIX_WAIT_FOR_COMPLETION(cbdata.in_progress);
        strncpy(nspace, cbdata.nspace, strlen(cbdata.nspace)+1);
    }
    PMIX_APP_FREE(apps, napps);
    if (strncmp(nspace, "foobar", strlen(nspace)+1)) {
        return PMIX_ERROR;
    }
    return rc;
}
Ejemplo n.º 5
0
PMIX_EXPORT pmix_status_t PMIx_Unpublish(char **keys,
                               const pmix_info_t info[], size_t ninfo)
{
    pmix_status_t rc;
    pmix_cb_t *cb;

    pmix_output_verbose(2, pmix_globals.debug_output,
                        "pmix: unpublish called");

    /* create a callback object as we need to pass it to the
     * recv routine so we know which callback to use when
     * the return message is recvd */
    cb = PMIX_NEW(pmix_cb_t);
    cb->active = true;

    /* push the message into our event base to send to the server */
    if (PMIX_SUCCESS != (rc = PMIx_Unpublish_nb(keys, info, ninfo, op_cbfunc, cb))) {
        PMIX_RELEASE(cb);
        return rc;
    }

    /* wait for the server to ack our request */
    PMIX_WAIT_FOR_COMPLETION(cb->active);
    rc = cb->status;
    PMIX_RELEASE(cb);

    return rc;
}
Ejemplo n.º 6
0
int PMIx_Disconnect(const pmix_proc_t procs[], size_t nprocs)
{
    int rc;
    pmix_cb_t *cb;
     
    if (pmix_client_globals.init_cntr <= 0) {
        return PMIX_ERR_INIT;
    }

    /* if we aren't connected, don't attempt to send */
    if (!pmix_globals.connected) {
        return PMIX_ERR_UNREACH;
    }

    /* create a callback object as we need to pass it to the
     * recv routine so we know which callback to use when
     * the return message is recvd */
    cb = PMIX_NEW(pmix_cb_t);
    cb->active = true;

    if (PMIX_SUCCESS != (rc = PMIx_Disconnect_nb(procs, nprocs, op_cbfunc, cb))) {
        PMIX_RELEASE(cb);
        return rc;
    }

    /* wait for the connect to complete */
    PMIX_WAIT_FOR_COMPLETION(cb->active);
    rc = cb->status;
    PMIX_RELEASE(cb);

    pmix_output_verbose(2, pmix_globals.debug_output,
                        "pmix: disconnect completed");

    return rc;
}
Ejemplo n.º 7
0
int PMIx_Lookup(pmix_data_range_t scope,
                const pmix_info_t info[], size_t ninfo,
                pmix_pdata_t pdata[], size_t ndata)
{
    int rc;
    pmix_cb_t *cb;
    char **keys = NULL;
    size_t i;

    pmix_output_verbose(2, pmix_globals.debug_output,
                        "pmix: lookup called");

    /* bozo protection */
    if (NULL == pdata) {
        return PMIX_ERR_BAD_PARAM;
    }

    /* transfer the pdata keys to the keys argv array */
    for (i=0; i < ndata; i++) {
        if ('\0' != pdata[i].key[0]) {
            pmix_argv_append_nosize(&keys, pdata[i].key);
        }
    }

    /* create a callback object as we need to pass it to the
     * recv routine so we know which callback to use when
     * the return message is recvd */
    cb = PMIX_NEW(pmix_cb_t);
    cb->cbdata = (void*)pdata;
    cb->nvals = ndata;
    cb->active = true;

    if (PMIX_SUCCESS != (rc = PMIx_Lookup_nb(scope, keys,
                                             info, ninfo,
                                             lookup_cbfunc, cb))) {
        PMIX_RELEASE(cb);
        pmix_argv_free(keys);
        return rc;
    }

    /* wait for the server to ack our request */
    PMIX_WAIT_FOR_COMPLETION(cb->active);

    /* the data has been stored in the info array by lookup_cbfunc, so
     * nothing more for us to do */
    rc = cb->status;
    PMIX_RELEASE(cb);
    return rc;
}
Ejemplo n.º 8
0
static void set_namespace(int nprocs, char *ranks, char *name)
{
    size_t ninfo;
    pmix_info_t *info;
    ninfo = 8;
    char *regex, *ppn;

    PMIX_INFO_CREATE(info, ninfo);
    (void)strncpy(info[0].key, PMIX_UNIV_SIZE, PMIX_MAX_KEYLEN);
    info[0].value.type = PMIX_UINT32;
    info[0].value.data.uint32 = nprocs;

    (void)strncpy(info[1].key, PMIX_SPAWNED, PMIX_MAX_KEYLEN);
    info[1].value.type = PMIX_UINT32;
    info[1].value.data.uint32 = 0;

    (void)strncpy(info[2].key, PMIX_LOCAL_SIZE, PMIX_MAX_KEYLEN);
    info[2].value.type = PMIX_UINT32;
    info[2].value.data.uint32 = nprocs;

    (void)strncpy(info[3].key, PMIX_LOCAL_PEERS, PMIX_MAX_KEYLEN);
    info[3].value.type = PMIX_STRING;
    info[3].value.data.string = strdup(ranks);

    PMIx_generate_regex(NODE_NAME, &regex);
    (void)strncpy(info[4].key, PMIX_NODE_MAP, PMIX_MAX_KEYLEN);
    info[4].value.type = PMIX_STRING;
    info[4].value.data.string = regex;

    PMIx_generate_ppn(ranks, &ppn);
    (void)strncpy(info[5].key, PMIX_PROC_MAP, PMIX_MAX_KEYLEN);
    info[5].value.type = PMIX_STRING;
    info[5].value.data.string = ppn;

    (void)strncpy(info[6].key, PMIX_JOB_SIZE, PMIX_MAX_KEYLEN);
    info[6].value.type = PMIX_UINT32;
    info[6].value.data.uint32 = nprocs;

    (void)strncpy(info[7].key, PMIX_APPNUM, PMIX_MAX_KEYLEN);
    info[7].value.type = PMIX_UINT32;
    info[7].value.data.uint32 = getpid ();

    int in_progress = 1, rc;
    if (PMIX_SUCCESS == (rc = PMIx_server_register_nspace(name, nprocs, info, ninfo, release_cb, &in_progress))) {
        PMIX_WAIT_FOR_COMPLETION(in_progress);
    }
    PMIX_INFO_FREE(info, ninfo);
}
Ejemplo n.º 9
0
PMIX_EXPORT pmix_status_t PMIx_Spawn(const pmix_info_t job_info[], size_t ninfo,
                           const pmix_app_t apps[], size_t napps,
                           char nspace[])
{
    pmix_status_t rc;
    pmix_cb_t *cb;

    pmix_output_verbose(2, pmix_globals.debug_output,
                        "pmix: spawn called");

    if (pmix_globals.init_cntr <= 0) {
        return PMIX_ERR_INIT;
    }

    /* if we aren't connected, don't attempt to send */
    if (!pmix_globals.connected) {
        return PMIX_ERR_UNREACH;
    }

    /* ensure the nspace (if provided) is initialized */
    if (NULL != nspace) {
        memset(nspace, 0, PMIX_MAX_NSLEN+1);
    }

    /* create a callback object */
    cb = PMIX_NEW(pmix_cb_t);
    cb->active = true;

    if (PMIX_SUCCESS != (rc = PMIx_Spawn_nb(job_info, ninfo, apps, napps, spawn_cbfunc, cb))) {
        PMIX_RELEASE(cb);
        return rc;
    }

    /* wait for the result */
    PMIX_WAIT_FOR_COMPLETION(cb->active);
    rc = cb->status;
    if (NULL != nspace) {
        (void)strncpy(nspace, cb->nspace, PMIX_MAX_NSLEN);
    }
    PMIX_RELEASE(cb);

    return rc;
}
Ejemplo n.º 10
0
PMIX_EXPORT pmix_status_t PMIx_Fence(const pmix_proc_t procs[], size_t nprocs,
                           const pmix_info_t info[], size_t ninfo)
{
    pmix_cb_t *cb;
    pmix_status_t rc;

    pmix_output_verbose(2, pmix_globals.debug_output,
                        "pmix: executing fence");

    if (pmix_globals.init_cntr <= 0) {
        return PMIX_ERR_INIT;
    }

    /* if we aren't connected, don't attempt to send */
    if (!pmix_globals.connected) {
        return PMIX_ERR_UNREACH;
    }

    /* create a callback object as we need to pass it to the
     * recv routine so we know which callback to use when
     * the return message is recvd */
    cb = PMIX_NEW(pmix_cb_t);
    cb->active = true;

    /* push the message into our event base to send to the server */
    if (PMIX_SUCCESS != (rc = PMIx_Fence_nb(procs, nprocs, info, ninfo,
                                            op_cbfunc, cb))) {
        PMIX_RELEASE(cb);
        return rc;
    }

    /* wait for the fence to complete */
    PMIX_WAIT_FOR_COMPLETION(cb->active);
    rc = cb->status;
    PMIX_RELEASE(cb);

    pmix_output_verbose(2, pmix_globals.debug_output,
                        "pmix: fence released");

    return rc;
}
Ejemplo n.º 11
0
static void myerr(pmix_status_t status,
                  pmix_proc_t procs[], size_t nprocs,
                  pmix_info_t info[], size_t ninfo)
{
    int rc;
    opal_list_t plist, ilist;
    opal_namelist_t *nm;
    opal_value_t *iptr;
    volatile int cond = 1;
    size_t n;

    /* convert the incoming status */
    rc = pmix1_convert_rc(status);

    /* convert the array of procs */
    OBJ_CONSTRUCT(&plist, opal_list_t);
    for (n=0; n < nprocs; n++) {
        nm = OBJ_NEW(opal_namelist_t);
        nm->name.jobid = strtoul(procs[n].nspace, NULL, 10);
        nm->name.vpid = procs[n].rank;
        opal_list_append(&plist, &nm->super);
    }

    /* convert the array of info */
    OBJ_CONSTRUCT(&ilist, opal_list_t);
    for (n=0; n < ninfo; n++) {
        iptr = OBJ_NEW(opal_value_t);
        iptr->key = strdup(info[n].key);
        pmix1_value_unload(iptr, &info[n].value);
        opal_list_append(&plist, &iptr->super);
    }

    /* call the base errhandler */
    opal_pmix_base_errhandler(rc, &plist, &ilist, completion_handler, (void *)&cond);
    PMIX_WAIT_FOR_COMPLETION(cond);

    OPAL_LIST_DESTRUCT(&plist);
    OPAL_LIST_DESTRUCT(&ilist);
}
Ejemplo n.º 12
0
static int native_fence(opal_process_name_t *procs, size_t nprocs)
{
    opal_buffer_t *msg, *bptr;
    pmix_cmd_t cmd = PMIX_FENCE_CMD;
    pmix_cb_t *cb;
    int rc, ret;
    opal_pmix_scope_t scope;
    int32_t cnt;
    opal_value_t *kp;
    opal_process_name_t id;
    size_t i;
    uint64_t np;

    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                        "%s pmix:native executing fence on %u procs",
                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (unsigned int)nprocs);

    if (NULL == mca_pmix_native_component.uri) {
        /* no server available, so just return */
        return OPAL_SUCCESS;
    }

    msg = OBJ_NEW(opal_buffer_t);
    /* pack the fence cmd */
    if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &cmd, 1, PMIX_CMD_T))) {
        OPAL_ERROR_LOG(rc);
        OBJ_RELEASE(msg);
        return rc;
    }
    /* pack the number of procs */
    if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &nprocs, 1, OPAL_SIZE))) {
        OPAL_ERROR_LOG(rc);
        OBJ_RELEASE(msg);
        return rc;
    }
    if (0 < nprocs) {
        if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, procs, nprocs, OPAL_NAME))) {
            OPAL_ERROR_LOG(rc);
            OBJ_RELEASE(msg);
            return rc;
        }
    }

    /* if we haven't already done it, ensure we have committed our values */
    if (NULL != mca_pmix_native_component.cache_local) {
        scope = PMIX_LOCAL;
        if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &scope, 1, PMIX_SCOPE_T))) {
            OPAL_ERROR_LOG(rc);
            OBJ_RELEASE(msg);
            return rc;
        }
        if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &mca_pmix_native_component.cache_local, 1, OPAL_BUFFER))) {
            OPAL_ERROR_LOG(rc);
            OBJ_RELEASE(msg);
            return rc;
        }
        OBJ_RELEASE(mca_pmix_native_component.cache_local);
    }
    if (NULL != mca_pmix_native_component.cache_remote) {
        scope = PMIX_REMOTE;
        if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &scope, 1, PMIX_SCOPE_T))) {
            OPAL_ERROR_LOG(rc);
            OBJ_RELEASE(msg);
            return rc;
        }
        if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &mca_pmix_native_component.cache_remote, 1, OPAL_BUFFER))) {
            OPAL_ERROR_LOG(rc);
            OBJ_RELEASE(msg);
            return rc;
        }
        OBJ_RELEASE(mca_pmix_native_component.cache_remote);
    }
    if (NULL != mca_pmix_native_component.cache_global) {
        scope = PMIX_GLOBAL;
        if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &scope, 1, PMIX_SCOPE_T))) {
            OPAL_ERROR_LOG(rc);
            OBJ_RELEASE(msg);
            return rc;
        }
        if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &mca_pmix_native_component.cache_global, 1, OPAL_BUFFER))) {
            OPAL_ERROR_LOG(rc);
            OBJ_RELEASE(msg);
            return rc;
        }
        OBJ_RELEASE(mca_pmix_native_component.cache_global);
    }

    /* create a callback object as we need to pass it to the
     * recv routine so we know which callback to use when
     * the return message is recvd */
    cb = OBJ_NEW(pmix_cb_t);
    cb->active = true;

    /* push the message into our event base to send to the server */
    PMIX_ACTIVATE_SEND_RECV(msg, wait_cbfunc, cb);

    /* wait for the fence to complete */
    PMIX_WAIT_FOR_COMPLETION(cb->active);

    /* get the number of contributors */
    cnt = 1;
    if (OPAL_SUCCESS != (rc = opal_dss.unpack(&cb->data, &np, &cnt, OPAL_UINT64))) {
        OPAL_ERROR_LOG(rc);
        return rc;
    }

    /* if data was returned, unpack and store it */
    for (i=0; i < np; i++) {
        /* get the buffer that contains the data for the next proc */
        cnt = 1;
        if (OPAL_SUCCESS != (rc = opal_dss.unpack(&cb->data, &msg, &cnt, OPAL_BUFFER))) {
            if (OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER == rc) {
                break;
            }
            OPAL_ERROR_LOG(rc);
            return rc;
        }
        /* extract the id of the contributor from the blob */
        cnt = 1;
        if (OPAL_SUCCESS != (rc = opal_dss.unpack(msg, &id, &cnt, OPAL_NAME))) {
            OPAL_ERROR_LOG(rc);
            return rc;
        }
        /* extract all blobs from this proc, starting with the scope */
        cnt = 1;
        while (OPAL_SUCCESS == (rc = opal_dss.unpack(msg, &scope, &cnt, PMIX_SCOPE_T))) {
            /* extract the blob for this scope */
            cnt = 1;
            if (OPAL_SUCCESS != (rc = opal_dss.unpack(msg, &bptr, &cnt, OPAL_BUFFER))) {
                OPAL_ERROR_LOG(rc);
                return rc;
            }
            /* now unpack and store the values - everything goes into our internal store */
            cnt = 1;
            while (OPAL_SUCCESS == (rc = opal_dss.unpack(bptr, &kp, &cnt, OPAL_VALUE))) {
                if (OPAL_SUCCESS != (ret = opal_dstore.store(opal_dstore_internal, &id, kp))) {
                    OPAL_ERROR_LOG(ret);
                }
                OBJ_RELEASE(kp);
                cnt = 1;
            }
            OBJ_RELEASE(bptr);
            cnt = 1;
        }
        if (OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) {
            OPAL_ERROR_LOG(rc);
        }
        OBJ_RELEASE(msg);
        if (OPAL_SUCCESS != rc && OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) {
            OPAL_ERROR_LOG(rc);
        } else {
            rc = OPAL_SUCCESS;
        }
    }

    OBJ_RELEASE(cb);

    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                        "%s pmix:native fence released",
                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));

    return OPAL_SUCCESS;
}
Ejemplo n.º 13
0
int main(int argc, char **argv)
{
    int rc;
    pmix_value_t value;
    pmix_value_t *val = &value;
    char *tmp;
    pmix_proc_t proc;
    uint32_t n, num_gets;
    bool active;

    /* init us */
    if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) {
        pmix_output(0, "Client ns %s rank %d: PMIx_Init failed: %d", myproc.nspace, myproc.rank, rc);
        exit(0);
    }
    pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank);

    /* get our universe size */
    (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN);
    proc.rank = PMIX_RANK_WILDCARD;
    if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) {
        pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %d", myproc.nspace, myproc.rank, rc);
        goto done;
    }
    nprocs = val->data.uint32;
    PMIX_VALUE_RELEASE(val);
    pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs);

    /* put a few values */
    (void)asprintf(&tmp, "%s-%d-internal", myproc.nspace, myproc.rank);
    value.type = PMIX_UINT32;
    value.data.uint32 = 1234;
    if (PMIX_SUCCESS != (rc = PMIx_Store_internal(&myproc, tmp, &value))) {
        pmix_output(0, "Client ns %s rank %d: PMIx_Store_internal failed: %d", myproc.nspace, myproc.rank, rc);
        goto done;
    }

    (void)asprintf(&tmp, "%s-%d-local", myproc.nspace, myproc.rank);
    value.type = PMIX_UINT64;
    value.data.uint64 = 1234;
    if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_LOCAL, tmp, &value))) {
        pmix_output(0, "Client ns %s rank %d: PMIx_Put internal failed: %d", myproc.nspace, myproc.rank, rc);
        goto done;
    }

    (void)asprintf(&tmp, "%s-%d-remote", myproc.nspace, myproc.rank);
    value.type = PMIX_STRING;
    value.data.string = "1234";
    if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_REMOTE, tmp, &value))) {
        pmix_output(0, "Client ns %s rank %d: PMIx_Put internal failed: %d", myproc.nspace, myproc.rank, rc);
        goto done;
    }

    /* introduce a delay by one rank so we can check what happens
     * if a "get" is received prior to data being provided */
    if (0 == myproc.rank) {
        sleep(2);
    }

    /* commit the data to the server */
    if (PMIX_SUCCESS != (rc = PMIx_Commit())) {
        pmix_output(0, "Client ns %s rank %d: PMIx_Commit failed: %d", myproc.nspace, myproc.rank, rc);
        goto done;
    }

    /* call fence_nb, but don't return any data */
    PMIX_PROC_CONSTRUCT(&proc);
    (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN);
    proc.rank = PMIX_RANK_WILDCARD;
    active = true;
    if (PMIX_SUCCESS != (rc = PMIx_Fence_nb(&proc, 1, NULL, 0, opcbfunc, &active))) {
        pmix_output(0, "Client ns %s rank %d: PMIx_Fence failed: %d", myproc.nspace, myproc.rank, rc);
        goto done;
    }

    /* get the committed data - ask for someone who doesn't exist as well */
    num_gets = 0;
    for (n=0; n < nprocs; n++) {
        (void)asprintf(&tmp, "%s-%d-local", myproc.nspace, n);
        proc.rank = n;
        if (PMIX_SUCCESS != (rc = PMIx_Get_nb(&proc, tmp,
                                              NULL, 0, valcbfunc, tmp))) {
            pmix_output(0, "Client ns %s rank %d: PMIx_Get %s failed: %d", myproc.nspace, n, tmp, rc);
            goto done;
        }
        ++num_gets;
        (void)asprintf(&tmp, "%s-%d-remote", myproc.nspace, n);
        if (PMIX_SUCCESS != (rc = PMIx_Get_nb(&proc, tmp,
                                              NULL, 0, valcbfunc, tmp))) {
            pmix_output(0, "Client ns %s rank %d: PMIx_Get %s failed: %d", myproc.nspace, n, tmp, rc);
            goto done;
        }
        ++num_gets;
    }

    /* wait for the first fence to finish */
    PMIX_WAIT_FOR_COMPLETION(active);

    /* wait for all my "get" calls to complete */
    while (getcount < num_gets) {
        struct timespec ts;
        ts.tv_sec = 0;
        ts.tv_nsec = 100000;
        nanosleep(&ts, NULL);
    }

    /* call fence again so everyone waits before leaving */
    proc.rank = PMIX_RANK_WILDCARD;
    if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, NULL, 0))) {
        pmix_output(0, "Client ns %s rank %d: PMIx_Fence failed: %d", myproc.nspace, myproc.rank, rc);
        goto done;
    }

 done:
    /* finalize us */
    pmix_output(0, "Client ns %s rank %d: Finalizing", myproc.nspace, myproc.rank);
    if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) {
        fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc);
    } else {
        fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank);
    }
    fflush(stderr);
    return(0);
}
Ejemplo n.º 14
0
static bool native_get_attr(const char *attr, opal_value_t **kv)
{
    opal_buffer_t *msg, *bptr;
    opal_list_t vals;
    opal_value_t *kp, *lclpeers=NULL, kvn;
    pmix_cmd_t cmd = PMIX_GETATTR_CMD;
    char **ranks;
    int rc, ret;
    int32_t cnt;
    bool found=false;
    opal_hwloc_locality_t locality;
    pmix_cb_t *cb;
    uint32_t i, myrank;
    opal_process_name_t id;
    char *cpuset;
    opal_buffer_t buf, buf2;

    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                        "%s pmix:native get_attr called",
                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));

    /* try to retrieve the requested value from the dstore */
    OBJ_CONSTRUCT(&vals, opal_list_t);
    if (OPAL_SUCCESS == opal_dstore.fetch(opal_dstore_internal, &OPAL_PROC_MY_NAME, attr, &vals)) {
        *kv = (opal_value_t*)opal_list_remove_first(&vals);
        OPAL_LIST_DESTRUCT(&vals);
        return true;
    }

    if (NULL == mca_pmix_native_component.uri) {
        /* no server available, so just return */
        return false;
    }

    /* if the value isn't yet available, then we should try to retrieve
     * all the available attributes and store them for future use */
    msg = OBJ_NEW(opal_buffer_t);
    /* pack the cmd */
    if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &cmd, 1, PMIX_CMD_T))) {
        OPAL_ERROR_LOG(rc);
        OBJ_RELEASE(msg);
        return false;
    }

    /* create a callback object as we need to pass it to the
     * recv routine so we know which callback to use when
     * the return message is recvd */
    cb = OBJ_NEW(pmix_cb_t);
    cb->active = true;

    /* push the message into our event base to send to the server */
    PMIX_ACTIVATE_SEND_RECV(msg, wait_cbfunc, cb);

    /* wait for the data to return */
    PMIX_WAIT_FOR_COMPLETION(cb->active);

    /* we have received the entire data blob for this process - unpack
     * and cache all values, keeping the one we requested to return
     * to the caller */
    cnt = 1;
    if (OPAL_SUCCESS != (rc = opal_dss.unpack(&cb->data, &ret, &cnt, OPAL_INT))) {
        OPAL_ERROR_LOG(rc);
        OBJ_RELEASE(cb);
        return false;
    }
    if (OPAL_SUCCESS == ret) {
        /* unpack the buffer containing the values */
        cnt = 1;
        if (OPAL_SUCCESS != (rc = opal_dss.unpack(&cb->data, &bptr, &cnt, OPAL_BUFFER))) {
            OPAL_ERROR_LOG(rc);
            OBJ_RELEASE(cb);
            return false;
        }
        cnt = 1;
        while (OPAL_SUCCESS == (rc = opal_dss.unpack(bptr, &kp, &cnt, OPAL_VALUE))) {
            opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                                "%s unpacked attr %s",
                                OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kp->key);
            /* if this is the local topology, we need to save it in a special way */
#if OPAL_HAVE_HWLOC
            {
                hwloc_topology_t topo;
                if (0 == strcmp(PMIX_LOCAL_TOPO, kp->key)) {
                    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                                        "%s saving topology",
                                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
                    /* transfer the byte object for unpacking */
                    OBJ_CONSTRUCT(&buf, opal_buffer_t);
                    opal_dss.load(&buf, kp->data.bo.bytes, kp->data.bo.size);
                    kp->data.bo.bytes = NULL;  // protect the data region
                    kp->data.bo.size = 0;
                    OBJ_RELEASE(kp);
                    /* extract the topology */
                    cnt=1;
                    if (OPAL_SUCCESS != (rc = opal_dss.unpack(&buf, &topo, &cnt, OPAL_HWLOC_TOPO))) {
                        OPAL_ERROR_LOG(rc);
                        OBJ_DESTRUCT(&buf);
                        continue;
                    }
                    OBJ_DESTRUCT(&buf);
                    if (NULL == opal_hwloc_topology) {
                        opal_hwloc_topology = topo;
                    } else {
                        hwloc_topology_destroy(topo);
                    }
                    cnt = 1;
                    continue;
                }
            }
#endif
            /* if this is the local cpuset blob, then unpack and store its contents */
            if (0 == strcmp(PMIX_LOCAL_CPUSETS, kp->key)) {
                opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                                    "%s received local cpusets",
                                    OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
                /* transfer the byte object for unpacking */
                OBJ_CONSTRUCT(&buf, opal_buffer_t);
                opal_dss.load(&buf, kp->data.bo.bytes, kp->data.bo.size);
                kp->data.bo.bytes = NULL;  // protect the data region
                kp->data.bo.size = 0;
                OBJ_RELEASE(kp);
                cnt=1;
                while (OPAL_SUCCESS == (rc = opal_dss.unpack(&buf, &id, &cnt, OPAL_NAME))) {
                    cnt=1;
                    if (OPAL_SUCCESS != (rc = opal_dss.unpack(&buf, &cpuset, &cnt, OPAL_STRING))) {
                        OPAL_ERROR_LOG(rc);
                        OBJ_DESTRUCT(&buf);
                        cnt = 1;
                        continue;
                    }
                    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                                        "%s saving cpuset %s for local peer %s",
                                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
                                        (NULL == cpuset) ? "NULL" : cpuset,
                                        OPAL_NAME_PRINT(id));
                    OBJ_CONSTRUCT(&kvn, opal_value_t);
                    kvn.key = strdup(OPAL_DSTORE_CPUSET);
                    kvn.type = OPAL_STRING;
                    kvn.data.string = cpuset;
                    if (OPAL_SUCCESS != (rc = opal_dstore.store(opal_dstore_internal, &id, &kvn))) {
                        OPAL_ERROR_LOG(rc);
                        OBJ_DESTRUCT(&kvn);
                        cnt = 1;
                        continue;
                    }
                    OBJ_DESTRUCT(&kvn);
                }
                OBJ_DESTRUCT(&buf);
                if (OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) {
                    OPAL_ERROR_LOG(rc);
                    return false;
                }
                cnt=1;
                continue;
            } else if (0 == strcmp(PMIX_PROC_MAP, kp->key)) {
                opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                                    "%s received proc map",
                                    OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
                /* transfer the byte object for unpacking */
                OBJ_CONSTRUCT(&buf, opal_buffer_t);
                opal_dss.load(&buf, kp->data.bo.bytes, kp->data.bo.size);
                kp->data.bo.bytes = NULL;  // protect the data region
                kp->data.bo.size = 0;
                OBJ_RELEASE(kp);
                /* get the jobid */
                cnt=1;
                if (OPAL_SUCCESS != (rc = opal_dss.unpack(&buf, &kp, &cnt, OPAL_VALUE))) {
                    OPAL_ERROR_LOG(rc);
                    OBJ_DESTRUCT(&buf);
                    cnt = 1;
                    return false;
                }
                if (0 != strcmp(PMIX_JOBID, kp->key)) {
                    OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM);
                    OBJ_DESTRUCT(&buf);
                    OBJ_RELEASE(kp);
                    cnt = 1;
                    return false;
                }
                id.jobid = kp->data.uint32;
                OBJ_RELEASE(kp);
                /* unpack the data for each rank */
                cnt=1;
                while (OPAL_SUCCESS == (rc = opal_dss.unpack(&buf, &kp, &cnt, OPAL_VALUE))) {
                    if (0 != strcmp(PMIX_RANK, kp->key)) {
                        OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM);
                        OBJ_DESTRUCT(&buf);
                        OBJ_RELEASE(kp);
                        cnt = 1;
                        return false;
                    }
                    id.vpid = kp->data.uint32;
                    /* unpack the blob for this rank */
                    cnt=1;
                    if (OPAL_SUCCESS != (rc = opal_dss.unpack(&buf, &kp, &cnt, OPAL_VALUE))) {
                        OPAL_ERROR_LOG(rc);
                        OBJ_DESTRUCT(&buf);
                        cnt = 1;
                        return false;
                    }
                    if (0 != strcmp(PMIX_PROC_MAP, kp->key)) {
                        OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM);
                        OBJ_DESTRUCT(&buf);
                        OBJ_RELEASE(kp);
                        cnt = 1;
                        return false;
                    }
                    /* transfer the byte object for unpacking */
                    OBJ_CONSTRUCT(&buf2, opal_buffer_t);
                    opal_dss.load(&buf2, kp->data.bo.bytes, kp->data.bo.size);
                    kp->data.bo.bytes = NULL;  // protect the data region
                    kp->data.bo.size = 0;
                    OBJ_RELEASE(kp);
                    /* unpack and store the map */
                    cnt=1;
                    while (OPAL_SUCCESS == (rc = opal_dss.unpack(&buf2, &kp, &cnt, OPAL_VALUE))) {
                        opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                                            "%s storing key %s for peer %s",
                                            OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
                                            kp->key, OPAL_NAME_PRINT(id));
                        if (OPAL_SUCCESS != (rc = opal_dstore.store(opal_dstore_internal, &id, kp))) {
                            OPAL_ERROR_LOG(rc);
                            OBJ_RELEASE(kp);
                            OBJ_DESTRUCT(&buf2);
                            return false;
                        }
                    }
                    OBJ_DESTRUCT(&buf2);
                    if (OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) {
                        OPAL_ERROR_LOG(rc);
                        return false;
                    }
                    cnt=1;
                }
                OBJ_DESTRUCT(&buf);
                if (OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) {
                    OPAL_ERROR_LOG(rc);
                    return false;
                }
                cnt=1;
                continue;
            }
            /* otherwise, it is a single piece of info, so store it */
            if (OPAL_SUCCESS != (rc = opal_dstore.store(opal_dstore_internal, &OPAL_PROC_MY_NAME, kp))) {
                OPAL_ERROR_LOG(rc);
                OBJ_RELEASE(kp);
                cnt = 1;
                continue;
            }
            /* save the list of local peers */
            if (0 == strcmp(PMIX_LOCAL_PEERS, kp->key)) {
                OBJ_RETAIN(kp);
                lclpeers = kp;
                opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                                    "%s saving local peers %s",
                                    OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), lclpeers->data.string);
            } else if (0 == strcmp(PMIX_JOBID, kp->key)) {
                native_pname.jobid = kp->data.uint32;
            } else if (0 == strcmp(PMIX_RANK, kp->key)) {
                native_pname.vpid = kp->data.uint32;
            }
            if (0 == strcmp(attr, kp->key)) {
                OBJ_RETAIN(kp);
                *kv = kp;
                found = true;
            }
            OBJ_RELEASE(kp);
            cnt = 1;
        }
        OBJ_RELEASE(bptr);
        if (OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) {
            OPAL_ERROR_LOG(rc);
            return false;
        }
    } else {
        OPAL_ERROR_LOG(ret);
        OBJ_RELEASE(cb);
        return false;
    }
    OBJ_RELEASE(cb);
    opal_proc_set_name(&native_pname);

    /* if the list of local peers wasn't included, then we are done */
    if (NULL == lclpeers) {
        opal_output_verbose(0, opal_pmix_base_framework.framework_output,
                            "%s no local peers reported",
                            OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
        return found;
    }

    /* baseline all the procs as nonlocal */
    myrank = native_pname.vpid;
    id.jobid = native_pname.jobid;

#if OPAL_HAVE_HWLOC
    /* fetch my cpuset */
    OBJ_CONSTRUCT(&vals, opal_list_t);
    if (OPAL_SUCCESS == (rc = opal_dstore.fetch(opal_dstore_internal, &native_pname,
                                                OPAL_DSTORE_CPUSET, &vals))) {
        kp = (opal_value_t*)opal_list_get_first(&vals);
        cpuset = strdup(kp->data.string);
    } else {
        cpuset = NULL;
    }
    OPAL_LIST_DESTRUCT(&vals);
#endif

    /* we only need to set locality for each local rank as "not found"
     * equates to "non local" */
    ranks = opal_argv_split(lclpeers->data.string, ',');
    for (i=0; NULL != ranks[i]; i++) {
        uint32_t vid = strtoul(ranks[i], NULL, 10);
        if (myrank == vid) {
            continue;
        }
        id.vpid = vid;
#if OPAL_HAVE_HWLOC
        OBJ_CONSTRUCT(&vals, opal_list_t);
        if (OPAL_SUCCESS != (rc = opal_dstore.fetch(opal_dstore_internal, &id,
                                                    OPAL_DSTORE_CPUSET, &vals))) {
            opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                                "%s cpuset for local proc %s not found",
                                OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
                                OPAL_NAME_PRINT(id));
            OPAL_LIST_DESTRUCT(&vals);
            /* even though the cpuset wasn't found, we at least know it is
             * on the same node with us */
            locality = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE;
        } else {
            kp = (opal_value_t*)opal_list_get_first(&vals);
            if (NULL == kp->data.string) {
                /* if we share a node, but we don't know anything more, then
                 * mark us as on the node as this is all we know
                 */
                locality = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE;
            } else {
                /* determine relative location on our node */
                locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology,
                                                                 cpuset,
                                                                 kp->data.string);
            }
            OPAL_LIST_DESTRUCT(&vals);
        }
#else
        /* all we know is we share a node */
        locality = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE;
#endif
        OPAL_OUTPUT_VERBOSE((1, opal_pmix_base_framework.framework_output,
                             "%s pmix:native proc %s locality %s",
                             OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
                             OPAL_NAME_PRINT(id),
                             opal_hwloc_base_print_locality(locality)));

        OBJ_CONSTRUCT(&kvn, opal_value_t);
        kvn.key = strdup(OPAL_DSTORE_LOCALITY);
        kvn.type = OPAL_UINT16;
        kvn.data.uint16 = locality;
        (void)opal_dstore.store(opal_dstore_internal, &id, &kvn);
        OBJ_DESTRUCT(&kvn);
    }
#if OPAL_HAVE_HWLOC
    if (NULL != cpuset) {
        free(cpuset);
    }
#endif
    opal_argv_free(ranks);

    return found;
}
Ejemplo n.º 15
0
static int native_abort(int flag, const char msg[])
{
    opal_buffer_t *bfr;
    pmix_cmd_t cmd = PMIX_ABORT_CMD;
    int rc;
    pmix_cb_t *cb;
    opal_event_t ev;
    struct timeval tv = {1, 0};

    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                        "%s pmix:native abort called",
                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));

    if (NULL == mca_pmix_native_component.uri) {
        /* no server available, so just return */
        return OPAL_SUCCESS;
    }

    if (PMIX_USOCK_CONNECTED == mca_pmix_native_component.state) {
        /* create a buffer to hold the message */
        bfr = OBJ_NEW(opal_buffer_t);
        /* pack the cmd */
        if (OPAL_SUCCESS != (rc = opal_dss.pack(bfr, &cmd, 1, PMIX_CMD_T))) {
            OPAL_ERROR_LOG(rc);
            OBJ_RELEASE(bfr);
            return rc;
        }
        /* pack the status flag */
        if (OPAL_SUCCESS != (rc = opal_dss.pack(bfr, &flag, 1, OPAL_INT))) {
            OPAL_ERROR_LOG(rc);
            OBJ_RELEASE(bfr);
            return rc;
        }
        /* pack the string message - a NULL is okay */
        if (OPAL_SUCCESS != (rc = opal_dss.pack(bfr, &msg, 1, OPAL_STRING))) {
            OPAL_ERROR_LOG(rc);
            OBJ_RELEASE(bfr);
            return rc;
        }

        /* create a callback object as we need to pass it to the
         * recv routine so we know which callback to use when
         * the return message is recvd */
        cb = OBJ_NEW(pmix_cb_t);
        cb->active = true;

        /* push a timeout event to wake us up just in case this
         * message cannot get thru - e.g., someone else may have
         * detected the failure of the server and ordered an abort */
        opal_event_evtimer_set(mca_pmix_native_component.evbase,
                               &ev, timeout, cb);
        opal_event_evtimer_add(&ev, &tv);

        /* push the message into our event base to send to the server */
        PMIX_ACTIVATE_SEND_RECV(bfr, wait_cbfunc, cb);

        /* wait for the release */
        PMIX_WAIT_FOR_COMPLETION(cb->active);
        OBJ_RELEASE(cb);
    }
    return OPAL_SUCCESS;
}
Ejemplo n.º 16
0
static int native_fini(void)
{
    opal_buffer_t *msg;
    pmix_cb_t *cb;
    pmix_cmd_t cmd = PMIX_FINALIZE_CMD;
    int rc;

    if (1 != init_cntr) {
        --init_cntr;
        return OPAL_SUCCESS;
    }
    init_cntr = 0;

    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                        "%s pmix:native finalize called",
                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));

    if (NULL == mca_pmix_native_component.uri) {
        /* nothing was setup, so return */
        return OPAL_SUCCESS;
    }

    if (PMIX_USOCK_CONNECTED == mca_pmix_native_component.state) {
        /* setup a cmd message to notify the PMIx
         * server that we are normally terminating */
        msg = OBJ_NEW(opal_buffer_t);
        /* pack the cmd */
        if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &cmd, 1, PMIX_CMD_T))) {
            OPAL_ERROR_LOG(rc);
            OBJ_RELEASE(msg);
            return rc;
        }

        /* create a callback object as we need to pass it to the
         * recv routine so we know which callback to use when
         * the return message is recvd */
        cb = OBJ_NEW(pmix_cb_t);
        cb->active = true;

        opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                            "%s pmix:native sending finalize sync to server",
                            OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));

        /* push the message into our event base to send to the server */
        PMIX_ACTIVATE_SEND_RECV(msg, wait_cbfunc, cb);

        /* wait for the ack to return */
        PMIX_WAIT_FOR_COMPLETION(cb->active);
        OBJ_RELEASE(cb);
    }

    if (NULL != mca_pmix_native_component.evbase) {
        opal_stop_progress_thread("opal_async", true);
        mca_pmix_native_component.evbase = NULL;
    }

    if (0 <= mca_pmix_native_component.sd) {
        CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
    }

    return OPAL_SUCCESS;
}
Ejemplo n.º 17
0
int main(int argc, char **argv)
{
    char **client_env=NULL;
    char **client_argv=NULL;
    int rc;
    struct stat stat_buf;
    struct timeval tv;
    double test_start;
    cli_state_t order[CLI_TERM+1];
    test_params params;
    INIT_TEST_PARAMS(params);
    int test_fail = 0;
    char *tmp;
    int ns_nprocs;

    gettimeofday(&tv, NULL);
    test_start = tv.tv_sec + 1E-6*tv.tv_usec;

    /* smoke test */
    if (PMIX_SUCCESS != 0) {
        TEST_ERROR(("ERROR IN COMPUTING CONSTANTS: PMIX_SUCCESS = %d", PMIX_SUCCESS));
        exit(1);
    }

    TEST_VERBOSE(("Testing version %s", PMIx_Get_version()));

    parse_cmd(argc, argv, &params);
    TEST_VERBOSE(("Start PMIx_lite smoke test (timeout is %d)", params.timeout));

    /* verify executable */
    if( 0 > ( rc = stat(params.binary, &stat_buf) ) ){
        TEST_ERROR(("Cannot stat() executable \"%s\": %d: %s", params.binary, errno, strerror(errno)));
        FREE_TEST_PARAMS(params);
        return 0;
    } else if( !S_ISREG(stat_buf.st_mode) ){
        TEST_ERROR(("Client executable \"%s\": is not a regular file", params.binary));
        FREE_TEST_PARAMS(params);
        return 0;
    }else if( !(stat_buf.st_mode & S_IXUSR) ){
        TEST_ERROR(("Client executable \"%s\": has no executable flag", params.binary));
        FREE_TEST_PARAMS(params);
        return 0;
    }

    /* setup the server library */
    pmix_info_t info[1];
    (void)strncpy(info[0].key, PMIX_SOCKET_MODE, PMIX_MAX_KEYLEN);
    info[0].value.type = PMIX_UINT32;
    info[0].value.data.uint32 = 0666;

    if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule, info, 1))) {
        TEST_ERROR(("Init failed with error %d", rc));
        FREE_TEST_PARAMS(params);
        return rc;
    }
    /* register the errhandler */
    PMIx_Register_event_handler(NULL, 0, NULL, 0,
                                errhandler, errhandler_reg_callbk, NULL);

    order[CLI_UNINIT] = CLI_FORKED;
    order[CLI_FORKED] = CLI_FIN;
    order[CLI_CONNECTED] = CLI_UNDEF;
    order[CLI_FIN] = CLI_TERM;
    order[CLI_DISCONN] = CLI_UNDEF;
    order[CLI_TERM] = CLI_UNDEF;
    cli_init(params.nprocs, order);

    /* set common argv and env */
    client_env = pmix_argv_copy(environ);
    set_client_argv(&params, &client_argv);

    tmp = pmix_argv_join(client_argv, ' ');
    TEST_VERBOSE(("Executing test: %s", tmp));
    free(tmp);

    int launched = 0;
    /* set namespaces and fork clients */
    if (NULL == params.ns_dist) {
        /* we have a single namespace for all clients */
        ns_nprocs = params.nprocs;
        rc = launch_clients(ns_nprocs, params.binary, &client_env, &client_argv);
        if (PMIX_SUCCESS != rc) {
            FREE_TEST_PARAMS(params);
            return rc;
        }
        launched += ns_nprocs;
    } else {
        char *pch;
        pch = strtok(params.ns_dist, ":");
        while (NULL != pch) {
            ns_nprocs = (int)strtol(pch, NULL, 10);
            if (params.nprocs < (uint32_t)(launched+ns_nprocs)) {
                TEST_ERROR(("Total number of processes doesn't correspond number specified by ns_dist parameter."));
                FREE_TEST_PARAMS(params);
                return PMIX_ERROR;
            }
            if (0 < ns_nprocs) {
                rc = launch_clients(ns_nprocs, params.binary, &client_env, &client_argv);
                if (PMIX_SUCCESS != rc) {
                    FREE_TEST_PARAMS(params);
                    return rc;
                }
            }
            pch = strtok (NULL, ":");
            launched += ns_nprocs;
        }
    }
    if (params.nprocs != (uint32_t)launched) {
        TEST_ERROR(("Total number of processes doesn't correspond number specified by ns_dist parameter."));
        cli_kill_all();
        test_fail = 1;
    }

    /* hang around until the client(s) finalize */
    while (!test_terminated()) {
        // To avoid test hang we want to interrupt the loop each 0.1s
        double test_current;

        // check if we exceed the max time
        gettimeofday(&tv, NULL);
        test_current = tv.tv_sec + 1E-6*tv.tv_usec;
        if( (test_current - test_start) > params.timeout ){
            break;
        }
        cli_wait_all(0);
    }

    if( !test_terminated() ){
        TEST_ERROR(("Test exited by a timeout!"));
        cli_kill_all();
        test_fail = 1;
    }

    if( test_abort ){
        TEST_ERROR(("Test was aborted!"));
        /* do not simply kill the clients as that generates
         * event notifications which these tests then print
         * out, flooding the log */
      //  cli_kill_all();
        test_fail = 1;
    }

    if (0 != params.test_spawn) {
        PMIX_WAIT_FOR_COMPLETION(spawn_wait);
    }

    pmix_argv_free(client_argv);
    pmix_argv_free(client_env);

    /* deregister the errhandler */
    PMIx_Deregister_event_handler(0, op_callbk, NULL);

    cli_wait_all(1.0);

    /* finalize the server library */
    if (PMIX_SUCCESS != (rc = PMIx_server_finalize())) {
        TEST_ERROR(("Finalize failed with error %d", rc));
    }

    FREE_TEST_PARAMS(params);

    if (0 == test_fail) {
        TEST_OUTPUT(("Test finished OK!"));
    }

    return test_fail;
}
Ejemplo n.º 18
0
static int native_get(const opal_process_name_t *id,
                      const char *key,
                      opal_value_t **kv)
{
    opal_buffer_t *msg, *bptr;
    pmix_cmd_t cmd = PMIX_GET_CMD;
    pmix_cb_t *cb;
    int rc, ret;
    int32_t cnt;
    opal_list_t vals;
    opal_value_t *kp;
    bool found;

    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                        "%s pmix:native getting value for proc %s key %s",
                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
                        OPAL_NAME_PRINT(*id), key);

    /* first see if we already have the info in our dstore */
    OBJ_CONSTRUCT(&vals, opal_list_t);
    if (OPAL_SUCCESS == opal_dstore.fetch(opal_dstore_internal, id,
                                          key, &vals)) {
        *kv = (opal_value_t*)opal_list_remove_first(&vals);
        OPAL_LIST_DESTRUCT(&vals);
        opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                            "%s pmix:native value retrieved from dstore",
                            OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
        return OPAL_SUCCESS;
    }

    if (NULL == mca_pmix_native_component.uri) {
        /* no server available, so just return */
        return OPAL_ERR_NOT_FOUND;
    }

    /* nope - see if we can get it */
    msg = OBJ_NEW(opal_buffer_t);
    /* pack the get cmd */
    if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &cmd, 1, PMIX_CMD_T))) {
        OPAL_ERROR_LOG(rc);
        OBJ_RELEASE(msg);
        return rc;
    }
    /* pack the request information - we'll get the entire blob
     * for this proc, so we don't need to pass the key */
    if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, id, 1, OPAL_NAME))) {
        OPAL_ERROR_LOG(rc);
        OBJ_RELEASE(msg);
        return rc;
    }
    /* create a callback object as we need to pass it to the
     * recv routine so we know which callback to use when
     * the return message is recvd */
    cb = OBJ_NEW(pmix_cb_t);
    cb->active = true;

    /* push the message into our event base to send to the server */
    PMIX_ACTIVATE_SEND_RECV(msg, wait_cbfunc, cb);

    /* wait for the data to return */
    PMIX_WAIT_FOR_COMPLETION(cb->active);

    /* we have received the entire data blob for this process - unpack
     * and cache all values, keeping the one we requested to return
     * to the caller */
    cnt = 1;
    if (OPAL_SUCCESS != (rc = opal_dss.unpack(&cb->data, &ret, &cnt, OPAL_INT))) {
        OPAL_ERROR_LOG(rc);
        OBJ_RELEASE(cb);
        return rc;
    }
    found = false;
    cnt = 1;
    while (OPAL_SUCCESS == (rc = opal_dss.unpack(&cb->data, &bptr, &cnt, OPAL_BUFFER))) {
        while (OPAL_SUCCESS == (rc = opal_dss.unpack(bptr, &kp, &cnt, OPAL_VALUE))) {
            opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                                "%s pmix:native retrieved %s (%s) from server for proc %s",
                                OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kp->key,
                                (OPAL_STRING == kp->type) ? kp->data.string : "NS",
                                OPAL_NAME_PRINT(*id));
            if (OPAL_SUCCESS != (ret = opal_dstore.store(opal_dstore_internal, id, kp))) {
                OPAL_ERROR_LOG(ret);
            }
            if (0 == strcmp(key, kp->key)) {
                *kv = kp;
                found = true;
            } else {
                OBJ_RELEASE(kp);
            }
        }
        if (OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) {
            OPAL_ERROR_LOG(rc);
        }
        OBJ_RELEASE(bptr);
        cnt = 1;
    }
    if (OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) {
        OPAL_ERROR_LOG(rc);
    } else {
        rc = OPAL_SUCCESS;
    }
    OBJ_RELEASE(cb);

    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                        "%s pmix:native get completed",
                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
    if (found) {
        return OPAL_SUCCESS;
    }
    /* we didn't find the requested data - pass back a
     * status that indicates the source of the problem,
     * either during the data fetch, message unpacking,
     * or not found */
    *kv = NULL;
    if (OPAL_SUCCESS == rc) {
        if (OPAL_SUCCESS == ret) {
            rc = OPAL_ERR_NOT_FOUND;
        } else {
            rc = ret;
        }
    }
    return rc;
}
Ejemplo n.º 19
0
int main(int argc, char **argv)
{
    char **client_env=NULL;
    char **client_argv=NULL;
    int rc;
    struct stat stat_buf;
    struct timeval tv;
    double test_start;
    test_params params;
    INIT_TEST_PARAMS(params);
    int test_fail = 0;
    char *tmp;
    int ns_nprocs;

    gettimeofday(&tv, NULL);
    test_start = tv.tv_sec + 1E-6*tv.tv_usec;

    /* smoke test */
    if (PMIX_SUCCESS != 0) {
        TEST_ERROR(("ERROR IN COMPUTING CONSTANTS: PMIX_SUCCESS = %d", PMIX_SUCCESS));
        exit(1);
    }

    TEST_VERBOSE(("Testing version %s", PMIx_Get_version()));

    parse_cmd(argc, argv, &params);
    TEST_VERBOSE(("Start PMIx_lite smoke test (timeout is %d)", params.timeout));

    /* set common argv and env */
    client_env = pmix_argv_copy(environ);
    set_client_argv(&params, &client_argv);

    tmp = pmix_argv_join(client_argv, ' ');
    TEST_VERBOSE(("Executing test: %s", tmp));
    free(tmp);

    /* verify executable */
    if( 0 > ( rc = stat(params.binary, &stat_buf) ) ){
        TEST_ERROR(("Cannot stat() executable \"%s\": %d: %s", params.binary, errno, strerror(errno)));
        FREE_TEST_PARAMS(params);
        return 0;
    } else if( !S_ISREG(stat_buf.st_mode) ){
        TEST_ERROR(("Client executable \"%s\": is not a regular file", params.binary));
        FREE_TEST_PARAMS(params);
        return 0;
    }else if( !(stat_buf.st_mode & S_IXUSR) ){
        TEST_ERROR(("Client executable \"%s\": has no executable flag", params.binary));
        FREE_TEST_PARAMS(params);
        return 0;
    }

    if (PMIX_SUCCESS != (rc = server_init(&params))) {
        FREE_TEST_PARAMS(params);
        return rc;
    }

    cli_init(params.lsize);

    int launched = 0;
    /* set namespaces and fork clients */
    if (NULL == params.ns_dist) {
        uint32_t i;
        int base_rank = 0;

        /* compute my start counter */
        for(i = 0; i < (uint32_t)my_server_id; i++) {
            base_rank += (params.nprocs % params.nservers) > (uint32_t)i ?
                        params.nprocs / params.nservers + 1 :
                        params.nprocs / params.nservers;
        }
        /* we have a single namespace for all clients */
        ns_nprocs = params.nprocs;
        launched += server_launch_clients(params.lsize, params.nprocs, base_rank,
                                   &params, &client_env, &client_argv);
    } else {
        char *pch;
        pch = strtok(params.ns_dist, ":");
        while (NULL != pch) {
            ns_nprocs = (int)strtol(pch, NULL, 10);
            if (params.nprocs < (uint32_t)(launched+ns_nprocs)) {
                TEST_ERROR(("Total number of processes doesn't correspond number specified by ns_dist parameter."));
                FREE_TEST_PARAMS(params);
                return PMIX_ERROR;
            }
            if (0 < ns_nprocs) {
                launched += server_launch_clients(ns_nprocs, ns_nprocs, 0, &params,
                                           &client_env, &client_argv);
            }
            pch = strtok (NULL, ":");
        }
    }
    if (params.lsize != (uint32_t)launched) {
        TEST_ERROR(("Total number of processes doesn't correspond number specified by ns_dist parameter."));
        cli_kill_all();
        test_fail = 1;
    }

    /* hang around until the client(s) finalize */
    while (!test_terminated()) {
        // To avoid test hang we want to interrupt the loop each 0.1s
        double test_current;

        // check if we exceed the max time
        gettimeofday(&tv, NULL);
        test_current = tv.tv_sec + 1E-6*tv.tv_usec;
        if( (test_current - test_start) > params.timeout ){
            break;
        }
        cli_wait_all(0);
    }

    if( !test_terminated() ){
        TEST_ERROR(("Test exited by a timeout!"));
        cli_kill_all();
        test_fail = 1;
    }

    if( test_abort ){
        TEST_ERROR(("Test was aborted!"));
        /* do not simply kill the clients as that generates
         * event notifications which these tests then print
         * out, flooding the log */
      //  cli_kill_all();
        test_fail = 1;
    }

    if (0 != params.test_spawn) {
        PMIX_WAIT_FOR_COMPLETION(spawn_wait);
    }

    /* deregister the errhandler */
    PMIx_Deregister_event_handler(0, op_callbk, NULL);

    cli_wait_all(1.0);

    test_fail += server_finalize(&params);

    FREE_TEST_PARAMS(params);
    pmix_argv_free(client_argv);
    pmix_argv_free(client_env);

    return test_fail;
}