Example #1
0
int main(int argc, char **argv)
{
    pmix_status_t rc;
    pmix_value_t *val;
    pmix_proc_t proc;
    pmix_info_t *info;
    size_t ninfo;
    volatile int active;
    pmix_query_t *query;
    size_t nq, n;
    myquery_data_t myquery_data;

fprintf(stderr, "I AM HERE\n");
fflush(stderr);
    sleep(10);
    exit(0);

    /* init us - since we were launched by the RM, our connection info
     * will have been provided at startup. */
    if (PMIX_SUCCESS != (rc = PMIx_tool_init(&myproc, NULL, 0))) {
        fprintf(stderr, "Debugger daemon ns %s rank %d: PMIx_tool_init failed: %d\n", myproc.nspace, myproc.rank, rc);
        exit(0);
    }
    fprintf(stderr, "Debugger daemon ns %s rank %d: Running\n", myproc.nspace, myproc.rank);


    /* register our default event handler */
    active = -1;
    PMIx_Register_event_handler(NULL, 0, NULL, 0,
                                notification_fn, evhandler_reg_callbk, (void*)&active);
    while (-1 == active) {
        usleep(10);
    }
    if (0 != active) {
        exit(active);
    }

    /* get the nspace of the job we are to debug */
    (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN);
    proc.rank = PMIX_RANK_WILDCARD;
    if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_DEBUG_JOB, NULL, 0, &val))) {
        fprintf(stderr, "[%s:%d] Failed to get job being debugged - error %d\n", myproc.nspace, myproc.rank, rc);
        goto done;
    }
    if (NULL == val) {
        fprintf(stderr, "Got NULL return\n");
        goto done;
    }
    fprintf(stderr, "[%s:%d] Debugging %s\n", myproc.nspace, myproc.rank, val->data.string);

    /* get our local proctable - for scalability reasons, we don't want to
     * have our "root" debugger process get the proctable for everybody and
     * send it out to us. So ask the local PMIx server for the pid's of
     * our local target processes */
    nq = 1;
    PMIX_QUERY_CREATE(query, nq);
    PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_QUERY_LOCAL_PROC_TABLE);
    query[0].nqual = 1;
    PMIX_INFO_CREATE(query[0].qualifiers, 1);
    PMIX_INFO_LOAD(&query[0].qualifiers[0], PMIX_NSPACE, val->data.string, PMIX_STRING);  // the nspace we are enquiring about
    /* setup the caddy to retrieve the data */
    myquery_data.info = NULL;
    myquery_data.ninfo = 0;
    myquery_data.active = true;
    /* execute the query */
    if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(query, nq, cbfunc, (void*)&myquery_data))) {
        fprintf(stderr, "PMIx_Query_info failed: %d\n", rc);
        goto done;
    }
    while (myquery_data.active) {
        usleep(10);
    }
    fprintf(stderr, "[%s:%d] Local proctable received\n", myproc.nspace, myproc.rank);


    /* now that we have the proctable for our local processes, we can do our
     * magic debugger stuff and attach to them. We then send a "release" event
     * to them - i.e., it's the equivalent to setting the MPIR breakpoint. We
     * do this with the event notification system */
    (void)strncpy(proc.nspace, val->data.string, PMIX_MAX_NSLEN);
    proc.rank = PMIX_RANK_WILDCARD;
    /* we send the notification to just the local procs of the job being debugged */
    ninfo = 1;
    PMIX_INFO_CREATE(info, ninfo);
    PMIX_INFO_LOAD(&info[0], PMIX_EVENT_CUSTOM_RANGE, &proc, PMIX_PROC);  // deliver to the target nspace
    fprintf(stderr, "[%s:%u] Sending release\n", myproc.nspace, myproc.rank);
    PMIx_Notify_event(PMIX_ERR_DEBUGGER_RELEASE,
                      NULL, PMIX_RANGE_LOCAL,
                      info, ninfo, NULL, NULL);

    /* do some debugger magic */
    n = 0;
    fprintf(stderr, "[%s:%u] Hanging around awhile, doing debugger magic\n", myproc.nspace, myproc.rank);
    while (n < 5) {
        usleep(1000);
        ++n;
    }

  done:
    /* finalize us */
    fprintf(stderr, "Debugger daemon ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank);
    if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) {
        fprintf(stderr, "Debugger daemon ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc);
    } else {
        fprintf(stderr, "Debugger daemon ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank);
    }
    fflush(stderr);
    return(0);
}
Example #2
0
static int attach_to_running_job(char *nspace)
{
    pmix_status_t rc;
    pmix_proc_t myproc;
    pmix_query_t *query;
    size_t nq;
    myquery_data_t *q;

    /* query the active nspaces so we can verify that the
     * specified one exists */
    nq = 1;
    PMIX_QUERY_CREATE(query, nq);
    PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_QUERY_NAMESPACES);

    q = (myquery_data_t*)malloc(sizeof(myquery_data_t));
    DEBUG_CONSTRUCT_LOCK(&q->lock);
    if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(query, nq, cbfunc, (void*)q))) {
        fprintf(stderr, "Client ns %s rank %d: PMIx_Query_info failed: %d\n", myproc.nspace, myproc.rank, rc);
        return -1;
    }
    DEBUG_WAIT_THREAD(&q->lock);
    DEBUG_DESTRUCT_LOCK(&q->lock);

    if (NULL == q->info) {
        fprintf(stderr, "Query returned no info\n");
        return -1;
    }
    /* the query should have returned a comma-delimited list of nspaces */
    if (PMIX_STRING != q->info[0].value.type) {
        fprintf(stderr, "Query returned incorrect data type: %d\n", q->info[0].value.type);
        return -1;
    }
    if (NULL == q->info[0].value.data.string) {
        fprintf(stderr, "Query returned no active nspaces\n");
        return -1;
    }

    fprintf(stderr, "Query returned %s\n", q->info[0].value.data.string);
    return 0;

#if 0
    /* split the returned string and look for the given nspace */

    /* if not found, then we have an error */
    PMIX_INFO_FREE(info, ninfo);

    /* get the proctable for this nspace */
    ninfo = 1;
    PMIX_INFO_CREATE(info, ninfo);
    (void)strncpy(info[0].key, PMIX_QUERY_PROC_TABLE, PMIX_MAX_KEYLEN);
    (void)strncpy(info[0].qualifier, nspace, PMIX_MAX_KEYLEN);
    if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(info, ninfo, infocbfunc, (void*)&active))) {
        fprintf(stderr, "Client ns %s rank %d: PMIx_Query_info_nb failed: %d\n", myproc.nspace, myproc.rank, rc);
        return -1;
    }
    /* wait to get a response */

    /* the query should have returned a data_array */
    if (PMIX_DATA_ARRAY != info[0].type) {
        fprintf(stderr, "Query returned incorrect data type: %d\n", info[0].type);
        return -1;
    }
    if (NULL == info[0].data.darray.array) {
        fprintf(stderr, "Query returned no proctable info\n");
        return -1;
    }
    /* the data array consists of a struct:
     *     size_t size;
     *     void* array;
     *
     * In this case, the array is composed of pmix_proc_info_t structs:
     *     pmix_proc_t proc;   // contains the nspace,rank of this proc
     *     char* hostname;
     *     char* executable_name;
     *     pid_t pid;
     *     int exit_code;
     *     pmix_proc_state_t state;
     */

    /* this is where a debugger tool would process the proctable to
     * create whatever blob it needs to provide to its daemons */
    PMIX_INFO_FREE(info, ninfo);

    /* setup the debugger daemon spawn request */
    napps = 1;
    PMIX_APP_CREATE(app, napps);
    /* setup the name of the daemon executable to launch */
    app[0].cmd = strdup("debuggerdaemon");
    app[0].argc = 1;
    app[0].argv = (char**)malloc(2*sizeof(char*));
    app[0].argv[0] = strdup("debuggerdaemon");
    app[0].argv[1] = NULL;
    /* provide directives so the daemons go where we want, and
     * let the RM know these are debugger daemons */
    ninfo = 3;
    PMIX_INFO_CREATE(app[0].info, ninfo);
    PMIX_INFO_LOAD(&app[0].info[0], PMIX_MAPBY, "ppr:1:node", PMIX_STRING);  // instruct the RM to launch one copy of the executable on each node
    PMIX_INFO_LOAD(&app[0].info[1], PMIX_DEBUGGER_DAEMONS, true, PMIX_BOOL); // these are debugger daemons
    PMIX_INFO_LOAD(&app[0].info[2], PMIX_DEBUG_TARGET, nspace, PMIX_STRING); // the "jobid" of the application to be debugged

    /* spawn the daemons */
    PMIx_Spawn(NULL, 0, app, napps, dspace);
    /* cleanup */
    PMIX_APP_FREE(app, napps);

    /* this is where a debugger tool would wait until the debug operation is complete */

    return 0;
#endif
}
Example #3
0
/* Xfer FUNCTIONS FOR GENERIC PMIX TYPES */
pmix_status_t pmix_bfrops_base_value_xfer(pmix_value_t *p,
                                          pmix_value_t *src)
{
    size_t n, m;
    pmix_status_t rc;
    char **prarray, **strarray;
    pmix_value_t *pv, *sv;
    pmix_info_t *p1, *s1;
    pmix_app_t *pa, *sa;
    pmix_pdata_t *pd, *sd;
    pmix_buffer_t *pb, *sb;
    pmix_byte_object_t *pbo, *sbo;
    pmix_kval_t *pk, *sk;
    pmix_modex_data_t *pm, *sm;
    pmix_proc_info_t *pi, *si;
    pmix_query_t *pq, *sq;
    pmix_envar_t *pe, *se;

    /* copy the right field */
    p->type = src->type;
    switch (src->type) {
    case PMIX_UNDEF:
    break;
    case PMIX_BOOL:
        p->data.flag = src->data.flag;
        break;
    case PMIX_BYTE:
        p->data.byte = src->data.byte;
        break;
    case PMIX_STRING:
        if (NULL != src->data.string) {
            p->data.string = strdup(src->data.string);
        } else {
            p->data.string = NULL;
        }
        break;
    case PMIX_SIZE:
        p->data.size = src->data.size;
        break;
    case PMIX_PID:
        p->data.pid = src->data.pid;
        break;
    case PMIX_INT:
        /* to avoid alignment issues */
        memcpy(&p->data.integer, &src->data.integer, sizeof(int));
        break;
    case PMIX_INT8:
        p->data.int8 = src->data.int8;
        break;
    case PMIX_INT16:
        /* to avoid alignment issues */
        memcpy(&p->data.int16, &src->data.int16, 2);
        break;
    case PMIX_INT32:
        /* to avoid alignment issues */
        memcpy(&p->data.int32, &src->data.int32, 4);
        break;
    case PMIX_INT64:
        /* to avoid alignment issues */
        memcpy(&p->data.int64, &src->data.int64, 8);
        break;
    case PMIX_UINT:
        /* to avoid alignment issues */
        memcpy(&p->data.uint, &src->data.uint, sizeof(unsigned int));
        break;
    case PMIX_UINT8:
        p->data.uint8 = src->data.uint8;
        break;
    case PMIX_UINT16:
        /* to avoid alignment issues */
        memcpy(&p->data.uint16, &src->data.uint16, 2);
        break;
    case PMIX_UINT32:
        /* to avoid alignment issues */
        memcpy(&p->data.uint32, &src->data.uint32, 4);
        break;
    case PMIX_UINT64:
        /* to avoid alignment issues */
        memcpy(&p->data.uint64, &src->data.uint64, 8);
        break;
    case PMIX_FLOAT:
        p->data.fval = src->data.fval;
        break;
    case PMIX_DOUBLE:
        p->data.dval = src->data.dval;
        break;
    case PMIX_TIMEVAL:
        memcpy(&p->data.tv, &src->data.tv, sizeof(struct timeval));
        break;
    case PMIX_TIME:
        memcpy(&p->data.time, &src->data.time, sizeof(time_t));
        break;
    case PMIX_STATUS:
        memcpy(&p->data.status, &src->data.status, sizeof(pmix_status_t));
        break;
    case PMIX_PROC:
        PMIX_PROC_CREATE(p->data.proc, 1);
        if (NULL == p->data.proc) {
            return PMIX_ERR_NOMEM;
        }
        memcpy(p->data.proc, src->data.proc, sizeof(pmix_proc_t));
        break;
    case PMIX_PROC_RANK:
        memcpy(&p->data.rank, &src->data.rank, sizeof(pmix_rank_t));
        break;
    case PMIX_BYTE_OBJECT:
    case PMIX_COMPRESSED_STRING:
        memset(&p->data.bo, 0, sizeof(pmix_byte_object_t));
        if (NULL != src->data.bo.bytes && 0 < src->data.bo.size) {
            p->data.bo.bytes = malloc(src->data.bo.size);
            memcpy(p->data.bo.bytes, src->data.bo.bytes, src->data.bo.size);
            p->data.bo.size = src->data.bo.size;
        } else {
            p->data.bo.bytes = NULL;
            p->data.bo.size = 0;
        }
        break;
    case PMIX_PERSIST:
        memcpy(&p->data.persist, &src->data.persist, sizeof(pmix_persistence_t));
        break;
    case PMIX_SCOPE:
        memcpy(&p->data.scope, &src->data.scope, sizeof(pmix_scope_t));
        break;
    case PMIX_DATA_RANGE:
        memcpy(&p->data.range, &src->data.range, sizeof(pmix_data_range_t));
        break;
    case PMIX_PROC_STATE:
        memcpy(&p->data.state, &src->data.state, sizeof(pmix_proc_state_t));
        break;
    case PMIX_PROC_INFO:
        PMIX_PROC_INFO_CREATE(p->data.pinfo, 1);
        if (NULL != src->data.pinfo->hostname) {
            p->data.pinfo->hostname = strdup(src->data.pinfo->hostname);
        }
        if (NULL != src->data.pinfo->executable_name) {
            p->data.pinfo->executable_name = strdup(src->data.pinfo->executable_name);
        }
        memcpy(&p->data.pinfo->pid, &src->data.pinfo->pid, sizeof(pid_t));
        memcpy(&p->data.pinfo->exit_code, &src->data.pinfo->exit_code, sizeof(int));
        memcpy(&p->data.pinfo->state, &src->data.pinfo->state, sizeof(pmix_proc_state_t));
        break;
    case PMIX_DATA_ARRAY:
        p->data.darray = (pmix_data_array_t*)calloc(1, sizeof(pmix_data_array_t));
        p->data.darray->type = src->data.darray->type;
        p->data.darray->size = src->data.darray->size;
        if (0 == p->data.darray->size || NULL == src->data.darray->array) {
            p->data.darray->array = NULL;
            p->data.darray->size = 0;
            break;
        }
        /* allocate space and do the copy */
        switch (src->data.darray->type) {
            case PMIX_UINT8:
            case PMIX_INT8:
            case PMIX_BYTE:
                p->data.darray->array = (char*)malloc(src->data.darray->size);
                if (NULL == p->data.darray->array) {
                    return PMIX_ERR_NOMEM;
                }
                memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size);
                break;
            case PMIX_UINT16:
            case PMIX_INT16:
                p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(uint16_t));
                if (NULL == p->data.darray->array) {
                    return PMIX_ERR_NOMEM;
                }
                memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(uint16_t));
                break;
            case PMIX_UINT32:
            case PMIX_INT32:
                p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(uint32_t));
                if (NULL == p->data.darray->array) {
                    return PMIX_ERR_NOMEM;
                }
                memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(uint32_t));
                break;
            case PMIX_UINT64:
            case PMIX_INT64:
                p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(uint64_t));
                if (NULL == p->data.darray->array) {
                    return PMIX_ERR_NOMEM;
                }
                memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(uint64_t));
                break;
            case PMIX_BOOL:
                p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(bool));
                if (NULL == p->data.darray->array) {
                    return PMIX_ERR_NOMEM;
                }
                memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(bool));
                break;
            case PMIX_SIZE:
                p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(size_t));
                if (NULL == p->data.darray->array) {
                    return PMIX_ERR_NOMEM;
                }
                memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(size_t));
                break;
            case PMIX_PID:
                p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(pid_t));
                if (NULL == p->data.darray->array) {
                    return PMIX_ERR_NOMEM;
                }
                memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pid_t));
                break;
            case PMIX_STRING:
                p->data.darray->array = (char**)malloc(src->data.darray->size * sizeof(char*));
                if (NULL == p->data.darray->array) {
                    return PMIX_ERR_NOMEM;
                }
                prarray = (char**)p->data.darray->array;
                strarray = (char**)src->data.darray->array;
                for (n=0; n < src->data.darray->size; n++) {
                    if (NULL != strarray[n]) {
                        prarray[n] = strdup(strarray[n]);
                    }
                }
                break;
            case PMIX_INT:
            case PMIX_UINT:
                p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(int));
                if (NULL == p->data.darray->array) {
                    return PMIX_ERR_NOMEM;
                }
                memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(int));
                break;
            case PMIX_FLOAT:
                p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(float));
                if (NULL == p->data.darray->array) {
                    return PMIX_ERR_NOMEM;
                }
                memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(float));
                break;
            case PMIX_DOUBLE:
                p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(double));
                if (NULL == p->data.darray->array) {
                    return PMIX_ERR_NOMEM;
                }
                memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(double));
                break;
            case PMIX_TIMEVAL:
                p->data.darray->array = (struct timeval*)malloc(src->data.darray->size * sizeof(struct timeval));
                if (NULL == p->data.darray->array) {
                    return PMIX_ERR_NOMEM;
                }
                memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(struct timeval));
                break;
            case PMIX_TIME:
                p->data.darray->array = (time_t*)malloc(src->data.darray->size * sizeof(time_t));
                if (NULL == p->data.darray->array) {
                    return PMIX_ERR_NOMEM;
                }
                memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(time_t));
                break;
            case PMIX_STATUS:
                p->data.darray->array = (pmix_status_t*)malloc(src->data.darray->size * sizeof(pmix_status_t));
                if (NULL == p->data.darray->array) {
                    return PMIX_ERR_NOMEM;
                }
                memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_status_t));
                break;
            case PMIX_VALUE:
                PMIX_VALUE_CREATE(p->data.darray->array, src->data.darray->size);
                if (NULL == p->data.darray->array) {
                    return PMIX_ERR_NOMEM;
                }
                pv = (pmix_value_t*)p->data.darray->array;
                sv = (pmix_value_t*)src->data.darray->array;
                for (n=0; n < src->data.darray->size; n++) {
                    if (PMIX_SUCCESS != (rc = pmix_value_xfer(&pv[n], &sv[n]))) {
                        PMIX_VALUE_FREE(pv, src->data.darray->size);
                        return rc;
                    }
                }
                break;
            case PMIX_PROC:
                PMIX_PROC_CREATE(p->data.darray->array, src->data.darray->size);
                if (NULL == p->data.darray->array) {
                    return PMIX_ERR_NOMEM;
                }
                memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_proc_t));
                break;
            case PMIX_APP:
                PMIX_APP_CREATE(p->data.darray->array, src->data.darray->size);
                if (NULL == p->data.darray->array) {
                    return PMIX_ERR_NOMEM;
                }
                pa = (pmix_app_t*)p->data.darray->array;
                sa = (pmix_app_t*)src->data.darray->array;
                for (n=0; n < src->data.darray->size; n++) {
                    if (NULL != sa[n].cmd) {
                        pa[n].cmd = strdup(sa[n].cmd);
                    }
                    if (NULL != sa[n].argv) {
                        pa[n].argv = pmix_argv_copy(sa[n].argv);
                    }
                    if (NULL != sa[n].env) {
                        pa[n].env = pmix_argv_copy(sa[n].env);
                    }
                    if (NULL != sa[n].cwd) {
                        pa[n].cwd = strdup(sa[n].cwd);
                    }
                    pa[n].maxprocs = sa[n].maxprocs;
                    if (0 < sa[n].ninfo && NULL != sa[n].info) {
                        PMIX_INFO_CREATE(pa[n].info, sa[n].ninfo);
                        if (NULL == pa[n].info) {
                            PMIX_APP_FREE(pa, src->data.darray->size);
                            return PMIX_ERR_NOMEM;
                        }
                        pa[n].ninfo = sa[n].ninfo;
                        for (m=0; m < pa[n].ninfo; m++) {
                            PMIX_INFO_XFER(&pa[n].info[m], &sa[n].info[m]);
                        }
                    }
                }
                break;
            case PMIX_INFO:
                PMIX_INFO_CREATE(p->data.darray->array, src->data.darray->size);
                p1 = (pmix_info_t*)p->data.darray->array;
                s1 = (pmix_info_t*)src->data.darray->array;
                for (n=0; n < src->data.darray->size; n++) {
                    PMIX_INFO_XFER(&p1[n], &s1[n]);
                }
                break;
            case PMIX_PDATA:
                PMIX_PDATA_CREATE(p->data.darray->array, src->data.darray->size);
                if (NULL == p->data.darray->array) {
                    return PMIX_ERR_NOMEM;
                }
                pd = (pmix_pdata_t*)p->data.darray->array;
                sd = (pmix_pdata_t*)src->data.darray->array;
                for (n=0; n < src->data.darray->size; n++) {
                    PMIX_PDATA_XFER(&pd[n], &sd[n]);
                }
                break;
            case PMIX_BUFFER:
                p->data.darray->array = (pmix_buffer_t*)malloc(src->data.darray->size * sizeof(pmix_buffer_t));
                if (NULL == p->data.darray->array) {
                    return PMIX_ERR_NOMEM;
                }
                pb = (pmix_buffer_t*)p->data.darray->array;
                sb = (pmix_buffer_t*)src->data.darray->array;
                for (n=0; n < src->data.darray->size; n++) {
                    PMIX_CONSTRUCT(&pb[n], pmix_buffer_t);
                    pmix_bfrops_base_copy_payload(&pb[n], &sb[n]);
                }
                break;
            case PMIX_BYTE_OBJECT:
            case PMIX_COMPRESSED_STRING:
                p->data.darray->array = (pmix_byte_object_t*)malloc(src->data.darray->size * sizeof(pmix_byte_object_t));
                if (NULL == p->data.darray->array) {
                    return PMIX_ERR_NOMEM;
                }
                pbo = (pmix_byte_object_t*)p->data.darray->array;
                sbo = (pmix_byte_object_t*)src->data.darray->array;
                for (n=0; n < src->data.darray->size; n++) {
                    if (NULL != sbo[n].bytes && 0 < sbo[n].size) {
                        pbo[n].size = sbo[n].size;
                        pbo[n].bytes = (char*)malloc(pbo[n].size);
                        memcpy(pbo[n].bytes, sbo[n].bytes, pbo[n].size);
                    } else {
                        pbo[n].bytes = NULL;
                        pbo[n].size = 0;
                    }
                }
                break;
            case PMIX_KVAL:
                p->data.darray->array = (pmix_kval_t*)calloc(src->data.darray->size , sizeof(pmix_kval_t));
                if (NULL == p->data.darray->array) {
                    return PMIX_ERR_NOMEM;
                }
                pk = (pmix_kval_t*)p->data.darray->array;
                sk = (pmix_kval_t*)src->data.darray->array;
                for (n=0; n < src->data.darray->size; n++) {
                    if (NULL != sk[n].key) {
                        pk[n].key = strdup(sk[n].key);
                    }
                    if (NULL != sk[n].value) {
                        PMIX_VALUE_CREATE(pk[n].value, 1);
                        if (NULL == pk[n].value) {
                            free(p->data.darray->array);
                            return PMIX_ERR_NOMEM;
                        }
                        if (PMIX_SUCCESS != (rc = pmix_value_xfer(pk[n].value, sk[n].value))) {
                            return rc;
                        }
                    }
                }
                break;
            case PMIX_MODEX:
                PMIX_MODEX_CREATE(p->data.darray->array, src->data.darray->size);
                if (NULL == p->data.darray->array) {
                    return PMIX_ERR_NOMEM;
                }
                pm = (pmix_modex_data_t*)p->data.darray->array;
                sm = (pmix_modex_data_t*)src->data.darray->array;
                for (n=0; n < src->data.darray->size; n++) {
                    memcpy(&pm[n], &sm[n], sizeof(pmix_modex_data_t));
                    if (NULL != sm[n].blob && 0 < sm[n].size) {
                        pm[n].blob = (uint8_t*)malloc(sm[n].size);
                        if (NULL == pm[n].blob) {
                            return PMIX_ERR_NOMEM;
                        }
                        memcpy(pm[n].blob, sm[n].blob, sm[n].size);
                        pm[n].size = sm[n].size;
                    } else {
                        pm[n].blob = NULL;
                        pm[n].size = 0;
                    }
                }
                break;
            case PMIX_PERSIST:
                p->data.darray->array = (pmix_persistence_t*)malloc(src->data.darray->size * sizeof(pmix_persistence_t));
                if (NULL == p->data.darray->array) {
                    return PMIX_ERR_NOMEM;
                }
                memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_persistence_t));
                break;
            case PMIX_POINTER:
                p->data.darray->array = (char**)malloc(src->data.darray->size * sizeof(char*));
                if (NULL == p->data.darray->array) {
                    return PMIX_ERR_NOMEM;
                }
                prarray = (char**)p->data.darray->array;
                strarray = (char**)src->data.darray->array;
                for (n=0; n < src->data.darray->size; n++) {
                    prarray[n] = strarray[n];
                }
                break;
            case PMIX_SCOPE:
                p->data.darray->array = (pmix_scope_t*)malloc(src->data.darray->size * sizeof(pmix_scope_t));
                if (NULL == p->data.darray->array) {
                    return PMIX_ERR_NOMEM;
                }
                memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_scope_t));
                break;
            case PMIX_DATA_RANGE:
                p->data.darray->array = (pmix_data_range_t*)malloc(src->data.darray->size * sizeof(pmix_data_range_t));
                if (NULL == p->data.darray->array) {
                    return PMIX_ERR_NOMEM;
                }
                memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_data_range_t));
                break;
            case PMIX_COMMAND:
                p->data.darray->array = (pmix_cmd_t*)malloc(src->data.darray->size * sizeof(pmix_cmd_t));
                if (NULL == p->data.darray->array) {
                    return PMIX_ERR_NOMEM;
                }
                memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_cmd_t));
                break;
            case PMIX_INFO_DIRECTIVES:
                p->data.darray->array = (pmix_info_directives_t*)malloc(src->data.darray->size * sizeof(pmix_info_directives_t));
                if (NULL == p->data.darray->array) {
                    return PMIX_ERR_NOMEM;
                }
                memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_info_directives_t));
                break;
            case PMIX_PROC_INFO:
                PMIX_PROC_INFO_CREATE(p->data.darray->array, src->data.darray->size);
                if (NULL == p->data.darray->array) {
                    return PMIX_ERR_NOMEM;
                }
                pi = (pmix_proc_info_t*)p->data.darray->array;
                si = (pmix_proc_info_t*)src->data.darray->array;
                for (n=0; n < src->data.darray->size; n++) {
                    memcpy(&pi[n].proc, &si[n].proc, sizeof(pmix_proc_t));
                    if (NULL != si[n].hostname) {
                        pi[n].hostname = strdup(si[n].hostname);
                    } else {
                        pi[n].hostname = NULL;
                    }
                    if (NULL != si[n].executable_name) {
                        pi[n].executable_name = strdup(si[n].executable_name);
                    } else {
                        pi[n].executable_name = NULL;
                    }
                    pi[n].pid = si[n].pid;
                    pi[n].exit_code = si[n].exit_code;
                    pi[n].state = si[n].state;
                }
                break;
            case PMIX_DATA_ARRAY:
                PMIX_ERROR_LOG(PMIX_ERR_NOT_SUPPORTED);
                return PMIX_ERR_NOT_SUPPORTED;  // don't support iterative arrays
            case PMIX_QUERY:
                PMIX_QUERY_CREATE(p->data.darray->array, src->data.darray->size);
                if (NULL == p->data.darray->array) {
                    return PMIX_ERR_NOMEM;
                }
                pq = (pmix_query_t*)p->data.darray->array;
                sq = (pmix_query_t*)src->data.darray->array;
                for (n=0; n < src->data.darray->size; n++) {
                    if (NULL != sq[n].keys) {
                        pq[n].keys = pmix_argv_copy(sq[n].keys);
                    }
                    if (NULL != sq[n].qualifiers && 0 < sq[n].nqual) {
                        PMIX_INFO_CREATE(pq[n].qualifiers, sq[n].nqual);
                        if (NULL == pq[n].qualifiers) {
                            PMIX_QUERY_FREE(pq, src->data.darray->size);
                            return PMIX_ERR_NOMEM;
                        }
                        for (m=0; m < sq[n].nqual; m++) {
                            PMIX_INFO_XFER(&pq[n].qualifiers[m], &sq[n].qualifiers[m]);
                        }
                        pq[n].nqual = sq[n].nqual;
                    } else {
                        pq[n].qualifiers = NULL;
                        pq[n].nqual = 0;
                    }
                }
                break;
            case PMIX_ENVAR:
                PMIX_ENVAR_CREATE(p->data.darray->array, src->data.darray->size);
                if (NULL == p->data.darray->array) {
                    return PMIX_ERR_NOMEM;
                }
                pe = (pmix_envar_t*)p->data.darray->array;
                se = (pmix_envar_t*)src->data.darray->array;
                for (n=0; n < src->data.darray->size; n++) {
                    if (NULL != se[n].envar) {
                        pe[n].envar = strdup(se[n].envar);
                    }
                    if (NULL != se[n].value) {
                        pe[n].value = strdup(se[n].value);
                    }
                    pe[n].separator = se[n].separator;
                }
                break;
            default:
                return PMIX_ERR_UNKNOWN_DATA_TYPE;
        }
        break;
    case PMIX_POINTER:
        memcpy(&p->data.ptr, &src->data.ptr, sizeof(void*));
        break;
    case PMIX_ENVAR:
        PMIX_ENVAR_CONSTRUCT(&p->data.envar);
        if (NULL != src->data.envar.envar) {
            p->data.envar.envar = strdup(src->data.envar.envar);
        }
        if (NULL != src->data.envar.value) {
            p->data.envar.value = strdup(src->data.envar.value);
        }
        p->data.envar.separator = src->data.envar.separator;
        break;

    /**** DEPRECATED ****/
    case PMIX_INFO_ARRAY:
        p->data.array->size = src->data.array->size;
        if (0 < src->data.array->size) {
            p->data.array->array = (pmix_info_t*)malloc(src->data.array->size * sizeof(pmix_info_t));
            if (NULL == p->data.array->array) {
                return PMIX_ERR_NOMEM;
            }
            p1 = (pmix_info_t*)p->data.array->array;
            s1 = (pmix_info_t*)src->data.array->array;
            for (n=0; n < src->data.darray->size; n++) {
                PMIX_INFO_XFER(&p1[n], &s1[n]);
            }
        }
        break;
    /********************/
    default:
        pmix_output(0, "XFER-PMIX-VALUE: UNSUPPORTED TYPE %d", (int)src->type);
        assert(0);
        return PMIX_ERROR;
    }
    return PMIX_SUCCESS;
}
Example #4
0
int main(int argc, char **argv)
{
    pmix_status_t rc;
    pmix_value_t *val;
    pmix_proc_t proc;
    pmix_info_t *info;
    size_t ninfo;
    pmix_query_t *query;
    size_t nq, n;
    myquery_data_t myquery_data;
    pid_t pid;
    pmix_status_t code = PMIX_ERR_JOB_TERMINATED;
    mylock_t mylock;
    myrel_t myrel;
    uint16_t localrank;
    char *target = NULL;

    pid = getpid();

    /* init us - since we were launched by the RM, our connection info
     * will have been provided at startup. */
    if (PMIX_SUCCESS != (rc = PMIx_tool_init(&myproc, NULL, 0))) {
        fprintf(stderr, "Debugger daemon: PMIx_tool_init failed: %d\n", rc);
        exit(0);
    }
    fprintf(stderr, "Debugger daemon ns %s rank %d pid %lu: Running\n", myproc.nspace, myproc.rank, (unsigned long)pid);


    /* register our default event handler */
    DEBUG_CONSTRUCT_LOCK(&mylock);
    PMIx_Register_event_handler(NULL, 0, NULL, 0,
                                notification_fn, evhandler_reg_callbk, (void*)&mylock);
    DEBUG_WAIT_THREAD(&mylock);
    if (PMIX_SUCCESS != mylock.status) {
        rc = mylock.status;
        DEBUG_DESTRUCT_LOCK(&mylock);
        goto done;
    }
    DEBUG_DESTRUCT_LOCK(&mylock);

    /* get the nspace of the job we are to debug - it will be in our JOB info */
#ifdef PMIX_LOAD_PROCID
    PMIX_LOAD_PROCID(&proc, myproc.nspace, PMIX_RANK_WILDCARD);
#else
    PMIX_PROC_CONSTRUCT(&proc);
    (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_KEYLEN);
    proc.rank = PMIX_RANK_WILDCARD;
#endif
    if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_DEBUG_JOB, NULL, 0, &val))) {
        fprintf(stderr, "[%s:%d:%lu] Failed to get job being debugged - error %s\n",
                myproc.nspace, myproc.rank,
                (unsigned long)pid, PMIx_Error_string(rc));
        goto done;
    }
    if (NULL == val || PMIX_STRING != val->type || NULL == val->data.string) {
        fprintf(stderr, "[%s:%d:%lu] Failed to get job being debugged - NULL data returned\n",
                myproc.nspace, myproc.rank, (unsigned long)pid);
        goto done;
    }
    /* save it for later */
    target = strdup(val->data.string);
    PMIX_VALUE_RELEASE(val);

    fprintf(stderr, "[%s:%d:%lu] Debugging %s\n", myproc.nspace, myproc.rank,
            (unsigned long)pid, target);

    /* get my local rank so I can determine which local proc is "mine"
     * to debug */
    val = NULL;
    if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, PMIX_LOCAL_RANK, NULL, 0, &val))) {
        fprintf(stderr, "[%s:%d:%lu] Failed to get my local rank - error %s\n",
                myproc.nspace, myproc.rank,
                (unsigned long)pid, PMIx_Error_string(rc));
        goto done;
    }
    if (NULL == val) {
        fprintf(stderr, "[%s:%d:%lu] Failed to get my local rank - NULL data returned\n",
                myproc.nspace, myproc.rank, (unsigned long)pid);
        goto done;
    }
    if (PMIX_UINT16 != val->type) {
        fprintf(stderr, "[%s:%d:%lu] Failed to get my local rank - returned wrong type %s\n",
                myproc.nspace, myproc.rank, (unsigned long)pid, PMIx_Data_type_string(val->type));
        goto done;
    }
    /* save the data */
    localrank = val->data.uint16;
    PMIX_VALUE_RELEASE(val);
    fprintf(stderr, "[%s:%d:%lu] my local rank %d\n", myproc.nspace, myproc.rank,
            (unsigned long)pid, (int)localrank);

    /* register another handler specifically for when the target
     * job completes */
    DEBUG_CONSTRUCT_LOCK(&myrel.lock);
    myrel.nspace = strdup(proc.nspace);
    PMIX_INFO_CREATE(info, 2);
    PMIX_INFO_LOAD(&info[0], PMIX_EVENT_RETURN_OBJECT, &myrel, PMIX_POINTER);
    /* only call me back when this specific job terminates */
    PMIX_LOAD_PROCID(&proc, target, PMIX_RANK_WILDCARD);
    PMIX_INFO_LOAD(&info[1], PMIX_EVENT_AFFECTED_PROC, &proc, PMIX_PROC);

    fprintf(stderr, "[%s:%d:%lu] registering for termination of %s\n", myproc.nspace, myproc.rank,
            (unsigned long)pid, proc.nspace);


    DEBUG_CONSTRUCT_LOCK(&mylock);
    PMIx_Register_event_handler(&code, 1, info, 2,
                                release_fn, evhandler_reg_callbk, (void*)&mylock);
    DEBUG_WAIT_THREAD(&mylock);
    if (PMIX_SUCCESS != mylock.status) {
        rc = mylock.status;
        DEBUG_DESTRUCT_LOCK(&mylock);
        PMIX_INFO_FREE(info, 2);
        goto done;
    }
    DEBUG_DESTRUCT_LOCK(&mylock);
    PMIX_INFO_FREE(info, 2);

    /* get our local proctable - for scalability reasons, we don't want to
     * have our "root" debugger process get the proctable for everybody and
     * send it out to us. So ask the local PMIx server for the pid's of
     * our local target processes */
    nq = 1;
    PMIX_QUERY_CREATE(query, nq);
    PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_QUERY_LOCAL_PROC_TABLE);
    query[0].nqual = 1;
    PMIX_INFO_CREATE(query[0].qualifiers, 1);
    PMIX_INFO_LOAD(&query[0].qualifiers[0], PMIX_NSPACE, target, PMIX_STRING);  // the nspace we are enquiring about
    /* setup the caddy to retrieve the data */
    DEBUG_CONSTRUCT_LOCK(&myquery_data.lock);
    myquery_data.info = NULL;
    myquery_data.ninfo = 0;
    /* execute the query */
    if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(query, nq, cbfunc, (void*)&myquery_data))) {
        fprintf(stderr, "PMIx_Query_info failed: %d\n", rc);
        goto done;
    }
    DEBUG_WAIT_THREAD(&myquery_data.lock);
    DEBUG_DESTRUCT_LOCK(&myquery_data.lock);
    PMIX_QUERY_FREE(query, nq);
    if (PMIX_SUCCESS != myquery_data.status) {
        rc = myquery_data.status;
        goto done;
    }

    fprintf(stderr, "[%s:%d:%lu] Local proctable received\n", myproc.nspace, myproc.rank, (unsigned long)pid);


    /* now that we have the proctable for our local processes, we can do our
     * magic debugger stuff and attach to them. We then send a "release" event
     * to them - i.e., it's the equivalent to setting the MPIR breakpoint. We
     * do this with the event notification system. For this example, we just
     * send it to all local procs of the job being debugged */
    (void)strncpy(proc.nspace, target, PMIX_MAX_NSLEN);
    proc.rank = PMIX_RANK_WILDCARD;
    ninfo = 2;
    PMIX_INFO_CREATE(info, ninfo);
    PMIX_INFO_LOAD(&info[0], PMIX_EVENT_CUSTOM_RANGE, &proc, PMIX_PROC);  // deliver to the target nspace
    PMIX_INFO_LOAD(&info[1], PMIX_EVENT_NON_DEFAULT, NULL, PMIX_BOOL);  // deliver to the target nspace
    fprintf(stderr, "[%s:%u:%lu] Sending release\n", myproc.nspace, myproc.rank, (unsigned long)pid);
    rc = PMIx_Notify_event(PMIX_ERR_DEBUGGER_RELEASE,
                           NULL, PMIX_RANGE_CUSTOM,
                           info, ninfo, NULL, NULL);
    if (PMIX_SUCCESS != rc) {
        fprintf(stderr, "%s[%s:%u:%lu] Sending release failed with error %s(%d)\n",
                myproc.nspace, myproc.rank, (unsigned long)pid, PMIx_Error_string(rc), rc);
        goto done;
    }

    /* do some debugger magic while waiting for the job to terminate */
    DEBUG_WAIT_THREAD(&myrel.lock);

  done:
    if (NULL != target) {
        free(target);
    }
    /* finalize us */
    fprintf(stderr, "Debugger daemon ns %s rank %d pid %lu: Finalizing\n", myproc.nspace, myproc.rank, (unsigned long)pid);
    if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) {
        fprintf(stderr, "Debugger daemon ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc);
    } else {
        fprintf(stderr, "Debugger daemon ns %s rank %d pid %lu:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank, (unsigned long)pid);
    }
    fflush(stderr);
    return(0);
}