Beispiel #1
0
static void mca_oob_ud_module_fini (mca_oob_ud_peer_t **peer)
{
    opal_process_name_t key;
    void *node1, *node2;
    int rc;

    rc = opal_proc_table_get_first_key (&mca_oob_ud_module.peers, &key,
                                        (void **) peer, &node1, &node2);
    if (OPAL_SUCCESS == rc) {
        do {
            if (NULL != *peer) {
                mca_oob_ud_peer_release (*peer);
            }
            rc = opal_proc_table_get_next_key (&mca_oob_ud_module.peers, &key,
                                               (void **) peer, node1, &node1, node2, &node2);
        } while (OPAL_SUCCESS == rc);
    }

    opal_proc_table_remove_all(&mca_oob_ud_module.peers);

    OBJ_DESTRUCT(&mca_oob_ud_module.peers);

    return;
}
Beispiel #2
0
static void xcast_recv(int status, orte_process_name_t* sender,
                       opal_buffer_t* buffer, orte_rml_tag_t tg,
                       void* cbdata)
{
    opal_list_item_t *item;
    orte_namelist_t *nm;
    int ret, cnt;
    opal_buffer_t *relay, *rly;
    orte_daemon_cmd_flag_t command = ORTE_DAEMON_NULL_CMD;
    opal_buffer_t wireup;
    opal_byte_object_t *bo;
    int8_t flag;
    orte_job_t *jdata;
    orte_proc_t *rec;
    opal_list_t coll;
    orte_grpcomm_signature_t *sig;
    orte_rml_tag_t tag;

    OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_framework.framework_output,
                         "%s grpcomm:direct:xcast:recv: with %d bytes",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         (int)buffer->bytes_used));

    /* we need a passthru buffer to send to our children */
    rly = OBJ_NEW(opal_buffer_t);
    opal_dss.copy_payload(rly, buffer);

    /* get the signature that we do not need */
    cnt=1;
    if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &sig, &cnt, ORTE_SIGNATURE))) {
        ORTE_ERROR_LOG(ret);
        ORTE_FORCED_TERMINATE(ret);
        return;
    }
    OBJ_RELEASE(sig);

    /* get the target tag */
    cnt=1;
    if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &tag, &cnt, ORTE_RML_TAG))) {
        ORTE_ERROR_LOG(ret);
        ORTE_FORCED_TERMINATE(ret);
        return;
    }

    /* setup a buffer we can pass to ourselves - this just contains
     * the initial message, minus the headers inserted by xcast itself */
    relay = OBJ_NEW(opal_buffer_t);
    opal_dss.copy_payload(relay, buffer);
    /* setup the relay list */
    OBJ_CONSTRUCT(&coll, opal_list_t);

    /* if this is headed for the daemon command processor,
     * then we first need to check for add_local_procs
     * as that command includes some needed wireup info */
    if (ORTE_RML_TAG_DAEMON == tag) {
        /* peek at the command */
        cnt=1;
        if (ORTE_SUCCESS == (ret = opal_dss.unpack(buffer, &command, &cnt, ORTE_DAEMON_CMD))) {
            /* if it is add_procs, then... */
            if (ORTE_DAEMON_ADD_LOCAL_PROCS == command ||
                ORTE_DAEMON_DVM_NIDMAP_CMD == command) {
                /* extract the byte object holding the daemonmap */
                cnt=1;
                if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &bo, &cnt, OPAL_BYTE_OBJECT))) {
                    ORTE_ERROR_LOG(ret);
                    goto relay;
                }

                /* update our local nidmap, if required - the decode function
                 * knows what to do - it will also free the bytes in the byte object
                 */
                if (ORTE_PROC_IS_HNP) {
                    /* no need - already have the info */
                    if (NULL != bo) {
                        if (NULL != bo->bytes) {
                            free(bo->bytes);
                        }
                        free(bo);
                    }
                } else {
                    OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
                                         "%s grpcomm:direct:xcast updating daemon nidmap",
                                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));

                    if (ORTE_SUCCESS != (ret = orte_util_decode_daemon_nodemap(bo))) {
                        ORTE_ERROR_LOG(ret);
                        goto relay;
                    }
                }

                /* update the routing plan */
                orte_routed.update_routing_plan();

                /* see if we have wiring info as well */
                cnt=1;
                if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &flag, &cnt, OPAL_INT8))) {
                    ORTE_ERROR_LOG(ret);
                    goto relay;
                }

                if (ORTE_DAEMON_ADD_LOCAL_PROCS == command) {
                    OBJ_RELEASE(relay);
                    relay = OBJ_NEW(opal_buffer_t);
                    /* repack the command */
                    if (OPAL_SUCCESS != (ret = opal_dss.pack(relay, &command, 1, ORTE_DAEMON_CMD))) {
                        ORTE_ERROR_LOG(ret);
                        goto relay;
                    }
                    if (0 == flag) {
                        /* copy the remainder of the payload */
                        opal_dss.copy_payload(relay, buffer);
                        /* no - just return */
                        goto relay;
                    }
                }

                /* unpack the byte object */
                cnt=1;
                if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &bo, &cnt, OPAL_BYTE_OBJECT))) {
                    ORTE_ERROR_LOG(ret);
                    goto relay;
                }
                if (0 < bo->size) {
                    /* load it into a buffer */
                    OBJ_CONSTRUCT(&wireup, opal_buffer_t);
                    opal_dss.load(&wireup, bo->bytes, bo->size);
                    /* pass it for processing */
                    if (ORTE_SUCCESS != (ret = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, &wireup))) {
                        ORTE_ERROR_LOG(ret);
                        OBJ_DESTRUCT(&wireup);
                        goto relay;
                    }
                    /* done with the wireup buffer - dump it */
                    OBJ_DESTRUCT(&wireup);
                }
                free(bo);
                if (ORTE_DAEMON_ADD_LOCAL_PROCS == command) {
                    /* copy the remainder of the payload */
                    opal_dss.copy_payload(relay, buffer);
                }
            }
        } else {
            ORTE_ERROR_LOG(ret);
            goto CLEANUP;
        }
    }

 relay:

    /* get the list of next recipients from the routed module */
    orte_routed.get_routing_list(&coll);

    /* if list is empty, no relay is required */
    if (opal_list_is_empty(&coll)) {
        OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
                             "%s grpcomm:direct:send_relay - recipient list is empty!",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
        OBJ_RELEASE(rly);
        goto CLEANUP;
    }

    /* send the message to each recipient on list, deconstructing it as we go */
    while (NULL != (item = opal_list_remove_first(&coll))) {
        nm = (orte_namelist_t*)item;

        OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
                             "%s grpcomm:direct:send_relay sending relay msg of %d bytes to %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)rly->bytes_used,
                             ORTE_NAME_PRINT(&nm->name)));
        OBJ_RETAIN(rly);
        /* check the state of the recipient - no point
         * sending to someone not alive
         */
        jdata = orte_get_job_data_object(nm->name.jobid);
        if (NULL == (rec = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, nm->name.vpid))) {
            opal_output(0, "%s grpcomm:direct:send_relay proc %s not found - cannot relay",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&nm->name));
            OBJ_RELEASE(rly);
            OBJ_RELEASE(item);
            continue;
        }
        if (ORTE_PROC_STATE_RUNNING < rec->state) {
            opal_output(0, "%s grpcomm:direct:send_relay proc %s not running - cannot relay",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&nm->name));
            OBJ_RELEASE(rly);
            OBJ_RELEASE(item);
            continue;
        }
        if (ORTE_SUCCESS != (ret = orte_rml.send_buffer_nb(&nm->name, rly, ORTE_RML_TAG_XCAST,
                                                           orte_rml_send_callback, NULL))) {
            ORTE_ERROR_LOG(ret);
            OBJ_RELEASE(rly);
            OBJ_RELEASE(item);
            continue;
        }
        OBJ_RELEASE(item);
    }
    OBJ_RELEASE(rly);  // retain accounting

 CLEANUP:
    /* cleanup */
    OBJ_DESTRUCT(&coll);

    /* now send the relay buffer to myself for processing */
    if (ORTE_DAEMON_DVM_NIDMAP_CMD != command) {
        if (ORTE_SUCCESS != (ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_NAME, relay, tag,
                                                           orte_rml_send_callback, NULL))) {
            ORTE_ERROR_LOG(ret);
            OBJ_RELEASE(relay);
        }
    }
}
Beispiel #3
0
int orte_ras_base_add_hosts(orte_job_t *jdata)
{
    int rc;
    opal_list_t nodes;
    int i;
    orte_app_context_t *app;

    /* construct a list to hold the results */
    OBJ_CONSTRUCT(&nodes, opal_list_t);
    
    /* Individual add-hostfile names, if given, are included
     * in the app_contexts for this job. We therefore need to
     * retrieve the app_contexts for the job, and then cycle
     * through them to see if anything is there. The parser will
     * add the nodes found in each add-hostfile to our list - i.e.,
     * the resulting list contains the UNION of all nodes specified
     * in add-hostfiles from across all app_contexts
     *
     * Note that any relative node syntax found in the add-hostfiles will
     * generate an error in this scenario, so only non-relative syntax
     * can be present
     */
    
    for (i=0; i < jdata->apps->size; i++) {
        if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
            continue;
        }
        if (NULL != app->add_hostfile) {
            OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
                                 "%s ras:base:add_hosts checking add-hostfile %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 app->add_hostfile));
            
            /* hostfile was specified - parse it and add it to the list */
            if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&nodes,
                                                                   app->add_hostfile))) {
                ORTE_ERROR_LOG(rc);
                OBJ_DESTRUCT(&nodes);
                return rc;
            }
            /* now indicate that this app is to run across it */
            app->hostfile = app->add_hostfile;
            app->add_hostfile = NULL;
        }
    }

    /* We next check for and add any add-host options. Note this is
     * a -little- different than dash-host in that (a) we add these
     * nodes to the global pool regardless of what may already be there,
     * and (b) as a result, any job and/or app_context can access them.
     *
     * Note that any relative node syntax found in the add-host lists will
     * generate an error in this scenario, so only non-relative syntax
     * can be present
     */
    for (i=0; i < jdata->apps->size; i++) {
        if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
            continue;
        }
        if (NULL != app->add_host) {
            if (4 < opal_output_get_verbosity(orte_ras_base_framework.framework_output)) {
                char *fff = opal_argv_join(app->add_host, ',');
                opal_output(0, "%s ras:base:add_hosts checking add-host %s",
                            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), fff);
                free(fff);
            }
            if (ORTE_SUCCESS != (rc = orte_util_add_dash_host_nodes(&nodes,
                                                                    app->add_host))) {
                ORTE_ERROR_LOG(rc);
                OBJ_DESTRUCT(&nodes);
                return rc;
            }
            /* now indicate that this app is to run across them */
            app->dash_host = app->add_host;
            app->add_host = NULL;
        }
    }
    
    /* if something was found, we add that to our global pool */
    if (!opal_list_is_empty(&nodes)) {
        /* store the results in the global resource pool - this removes the
         * list items
         */
        if (ORTE_SUCCESS != (rc = orte_ras_base_node_insert(&nodes, jdata))) {
            ORTE_ERROR_LOG(rc);
        }
        /* cleanup */
        OBJ_DESTRUCT(&nodes);
    }
    
    /* shall we display the results? */
    if (0 < opal_output_get_verbosity(orte_ras_base_framework.framework_output)) {
        orte_ras_base_display_alloc();
    }
    
    return ORTE_SUCCESS;
}
Beispiel #4
0
int ompi_mpi_finalize(void)
{
    int ret;
    static int32_t finalize_has_already_started = 0;
    opal_list_item_t *item;
    struct timeval ompistart, ompistop;
    ompi_rte_collective_t *coll;
    ompi_proc_t** procs;
    size_t nprocs;

    /* Be a bit social if an erroneous program calls MPI_FINALIZE in
       two different threads, otherwise we may deadlock in
       ompi_comm_free() (or run into other nasty lions, tigers, or
       bears) */

    if (! opal_atomic_cmpset_32(&finalize_has_already_started, 0, 1)) {
        /* Note that if we're already finalized, we cannot raise an
           MPI exception.  The best that we can do is write something
           to stderr. */
        char hostname[MAXHOSTNAMELEN];
        pid_t pid = getpid();
        gethostname(hostname, sizeof(hostname));

        opal_show_help("help-mpi-runtime.txt",
                       "mpi_finalize:invoked_multiple_times",
                       true, hostname, pid);
        return MPI_ERR_OTHER;
    }

    ompi_mpiext_fini();

    /* Per MPI-2:4.8, we have to free MPI_COMM_SELF before doing
       anything else in MPI_FINALIZE (to include setting up such that
       MPI_FINALIZED will return true). */

    if (NULL != ompi_mpi_comm_self.comm.c_keyhash) {
        ompi_attr_delete_all(COMM_ATTR, &ompi_mpi_comm_self,
                             ompi_mpi_comm_self.comm.c_keyhash);
        OBJ_RELEASE(ompi_mpi_comm_self.comm.c_keyhash);
        ompi_mpi_comm_self.comm.c_keyhash = NULL;
    }

    /* Proceed with MPI_FINALIZE */

    ompi_mpi_finalized = true;

    /* As finalize is the last legal MPI call, we are allowed to force the release
     * of the user buffer used for bsend, before going anywhere further.
     */
    (void)mca_pml_base_bsend_detach(NULL, NULL);

    nprocs = 0;
    procs = ompi_proc_all(&nprocs);
    MCA_PML_CALL(del_procs(procs, nprocs));
    free(procs);

#if OMPI_ENABLE_PROGRESS_THREADS == 0
    opal_progress_set_event_flag(OPAL_EVLOOP_ONCE | OPAL_EVLOOP_NONBLOCK);
#endif

    /* Redo ORTE calling opal_progress_event_users_increment() during
       MPI lifetime, to get better latency when not using TCP */
    opal_progress_event_users_increment();

    /* check to see if we want timing information */
    if (ompi_enable_timing != 0 && 0 == OMPI_PROC_MY_NAME->vpid) {
        gettimeofday(&ompistart, NULL);
    }

    /* NOTE: MPI-2.1 requires that MPI_FINALIZE is "collective" across
       *all* connected processes.  This only means that all processes
       have to call it.  It does *not* mean that all connected
       processes need to synchronize (either directly or indirectly).  

       For example, it is quite easy to construct complicated
       scenarios where one job is "connected" to another job via
       transitivity, but have no direct knowledge of each other.
       Consider the following case: job A spawns job B, and job B
       later spawns job C.  A "connectedness" graph looks something
       like this:

           A <--> B <--> C

       So what are we *supposed* to do in this case?  If job A is
       still connected to B when it calls FINALIZE, should it block
       until jobs B and C also call FINALIZE?

       After lengthy discussions many times over the course of this
       project, the issue was finally decided at the Louisville Feb
       2009 meeting: no.

       Rationale:

       - "Collective" does not mean synchronizing.  It only means that
         every process call it.  Hence, in this scenario, every
         process in A, B, and C must call FINALIZE.

       - KEY POINT: if A calls FINALIZE, then it is erroneous for B or
         C to try to communicate with A again.

       - Hence, OMPI is *correct* to only effect a barrier across each
         jobs' MPI_COMM_WORLD before exiting.  Specifically, if A
         calls FINALIZE long before B or C, it's *correct* if A exits
         at any time (and doesn't notify B or C that it is exiting).

       - Arguably, if B or C do try to communicate with the now-gone
         A, OMPI should try to print a nice error ("you tried to
         communicate with a job that is already gone...") instead of
         segv or other Badness.  However, that is an *extremely*
         difficult problem -- sure, it's easy for A to tell B that it
         is finalizing, but how can A tell C?  A doesn't even know
         about C.  You'd need to construct a "connected" graph in a
         distributed fashion, which is fraught with race conditions,
         etc.

      Hence, our conclusion is: OMPI is *correct* in its current
      behavior (of only doing a barrier across its own COMM_WORLD)
      before exiting.  Any problems that occur are as a result of
      erroneous MPI applications.  We *could* tighten up the erroneous
      cases and ensure that we print nice error messages / don't
      crash, but that is such a difficult problem that we decided we
      have many other, much higher priority issues to handle that deal
      with non-erroneous cases. */

    /* wait for everyone to reach this point
       This is a grpcomm barrier instead of an MPI barrier because an
       MPI barrier doesn't ensure that all messages have been transmitted
       before exiting, so the possibility of a stranded message exists.
    */
    coll = OBJ_NEW(ompi_rte_collective_t);
    coll->id = ompi_process_info.peer_fini_barrier;
    coll->active = true;
    if (OMPI_SUCCESS != (ret = ompi_rte_barrier(coll))) {
        OMPI_ERROR_LOG(ret);
        return ret;
    }

    /* wait for barrier to complete */
    OMPI_LAZY_WAIT_FOR_COMPLETION(coll->active);
    OBJ_RELEASE(coll);

    /* check for timing request - get stop time and report elapsed
     time if so */
    if (ompi_enable_timing && 0 == OMPI_PROC_MY_NAME->vpid) {
        gettimeofday(&ompistop, NULL);
        opal_output(0, "ompi_mpi_finalize[%ld]: time to execute barrier %ld usec",
                    (long)OMPI_PROC_MY_NAME->vpid,
                    (long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 +
                               (ompistop.tv_usec - ompistart.tv_usec)));
    }

    /*
     * Shutdown the Checkpoint/Restart Mech.
     */
    if (OMPI_SUCCESS != (ret = ompi_cr_finalize())) {
        OMPI_ERROR_LOG(ret);
    }

    /* Shut down any bindings-specific issues: C++, F77, F90 */

    /* Remove all memory associated by MPI_REGISTER_DATAREP (per
       MPI-2:9.5.3, there is no way for an MPI application to
       *un*register datareps, but we don't want the OMPI layer causing
       memory leaks). */
    while (NULL != (item = opal_list_remove_first(&ompi_registered_datareps))) {
        OBJ_RELEASE(item);
    }
    OBJ_DESTRUCT(&ompi_registered_datareps);

    /* Remove all F90 types from the hash tables. As the OBJ_DESTRUCT will
     * call a special destructor able to release predefined types, we can
     * simply call the OBJ_DESTRUCT on the hash table and all memory will
     * be correctly released.
     */
    OBJ_DESTRUCT( &ompi_mpi_f90_integer_hashtable );
    OBJ_DESTRUCT( &ompi_mpi_f90_real_hashtable );
    OBJ_DESTRUCT( &ompi_mpi_f90_complex_hashtable );

    /* Free communication objects */

    /* free file resources */
    if (OMPI_SUCCESS != (ret = ompi_file_finalize())) {
        return ret;
    }

    /* free window resources */
    if (OMPI_SUCCESS != (ret = ompi_win_finalize())) {
        return ret;
    }
    if (OMPI_SUCCESS != (ret = ompi_osc_base_finalize())) {
        return ret;
    }

    /* free pml resource */ 
    if(OMPI_SUCCESS != (ret = mca_pml_base_finalize())) { 
      return ret;
    }
    /* free communicator resources */
    if (OMPI_SUCCESS != (ret = ompi_comm_finalize())) {
        return ret;
    }

    /* free requests */
    if (OMPI_SUCCESS != (ret = ompi_request_finalize())) {
        return ret;
    }

    if (OMPI_SUCCESS != (ret = ompi_message_finalize())) {
        return ret;
    }

    /* If requested, print out a list of memory allocated by ALLOC_MEM
       but not freed by FREE_MEM */
    if (0 != ompi_debug_show_mpi_alloc_mem_leaks) {
        mca_mpool_base_tree_print();
    }

    /* Now that all MPI objects dealing with communications are gone,
       shut down MCA types having to do with communications */
    if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_pml_base_framework) ) ) {
        OMPI_ERROR_LOG(ret);
        return ret;
    }

    /* shut down buffered send code */
    mca_pml_base_bsend_fini();

#if OPAL_ENABLE_FT_CR == 1
    /*
     * Shutdown the CRCP Framework, must happen after PML shutdown
     */
    if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_crcp_base_framework) ) ) {
        OMPI_ERROR_LOG(ret);
        return ret;
    }
#endif

    /* Free secondary resources */

    /* free attr resources */
    if (OMPI_SUCCESS != (ret = ompi_attr_finalize())) {
        return ret;
    }

    /* free group resources */
    if (OMPI_SUCCESS != (ret = ompi_group_finalize())) {
        return ret;
    }

    /* free proc resources */
    if ( OMPI_SUCCESS != (ret = ompi_proc_finalize())) {
        return ret;
    }
    
    /* finalize the pubsub functions */
    if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_pubsub_base_framework) ) ) {
        return ret;
    }
    
    /* finalize the DPM framework */
    if ( OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_dpm_base_framework))) {
        return ret;
    }
    
    /* free internal error resources */
    if (OMPI_SUCCESS != (ret = ompi_errcode_intern_finalize())) {
        return ret;
    }
     
    /* free error code resources */
    if (OMPI_SUCCESS != (ret = ompi_mpi_errcode_finalize())) {
        return ret;
    }

    /* free errhandler resources */
    if (OMPI_SUCCESS != (ret = ompi_errhandler_finalize())) {
        return ret;
    }

    /* Free all other resources */

    /* free op resources */
    if (OMPI_SUCCESS != (ret = ompi_op_finalize())) {
        return ret;
    }

    /* free ddt resources */
    if (OMPI_SUCCESS != (ret = ompi_datatype_finalize())) {
        return ret;
    }

    /* free info resources */
    if (OMPI_SUCCESS != (ret = ompi_info_finalize())) {
        return ret;
    }

    /* Close down MCA modules */

    /* io is opened lazily, so it's only necessary to close it if it
       was actually opened */
    if (0 < ompi_io_base_framework.framework_refcnt) {
        /* May have been "opened" multiple times. We want it closed now */
        ompi_io_base_framework.framework_refcnt = 1;

        if (OMPI_SUCCESS != mca_base_framework_close(&ompi_io_base_framework)) {
            return ret;
        }
    }
    (void) mca_base_framework_close(&ompi_topo_base_framework);
    if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_osc_base_framework))) {
        return ret;
    }
    if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_coll_base_framework))) {
        return ret;
    }
    if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_bml_base_framework))) {
        return ret;
    }
    if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_mpool_base_framework))) {
        return ret;
    }
    if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_rcache_base_framework))) {
        return ret;
    }
    if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_allocator_base_framework))) {
        return ret;
    }

    if (NULL != ompi_mpi_main_thread) {
        OBJ_RELEASE(ompi_mpi_main_thread);
        ompi_mpi_main_thread = NULL;
    }

    /* Leave the RTE */

    if (OMPI_SUCCESS != (ret = ompi_rte_finalize())) {
        return ret;
    }

    /* now close the rte framework */
    if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_rte_base_framework) ) ) {
        OMPI_ERROR_LOG(ret);
        return ret;
    }

    if (OPAL_SUCCESS != (ret = opal_finalize_util())) {
        return ret;
    }

    /* All done */

    return MPI_SUCCESS;
}
Beispiel #5
0
static void des(ompi_orte_tracker_t *p)
{
    OBJ_DESTRUCT(&p->lock);
    OBJ_DESTRUCT(&p->cond);
}
Beispiel #6
0
static int s1_init(void)
{
    PMI_BOOL initialized;
    int spawned;
    int rc, ret = OPAL_ERROR;
    int i, rank, lrank, nrank;
    char *pmix_id, tmp[64];
    opal_value_t kv;
    char *str;
    uint32_t ui32;
    opal_process_name_t ldr;
    char **localranks=NULL;

    if (PMI_SUCCESS != (rc = PMI_Initialized(&initialized))) {
        OPAL_PMI_ERROR(rc, "PMI_Initialized");
        return OPAL_ERROR;
    }

    if (PMI_TRUE != initialized && PMI_SUCCESS != (rc = PMI_Init(&spawned))) {
        OPAL_PMI_ERROR(rc, "PMI_Init");
        return OPAL_ERROR;
    }

    // setup hash table
    opal_pmix_base_hash_init();

    // Initialize space demands
    rc = PMI_KVS_Get_value_length_max(&pmix_vallen_max);
    if (PMI_SUCCESS != rc) {
        OPAL_PMI_ERROR(rc, "PMI_KVS_Get_value_length_max");
        goto err_exit;
    }
    pmix_vallen_threshold = pmix_vallen_max * 3;
    pmix_vallen_threshold >>= 2;

    rc = PMI_KVS_Get_name_length_max(&pmix_kvslen_max);
    if (PMI_SUCCESS != rc) {
        OPAL_PMI_ERROR(rc, "PMI_KVS_Get_name_length_max");
        goto err_exit;
    }

    rc = PMI_KVS_Get_key_length_max(&pmix_keylen_max);
    if (PMI_SUCCESS != rc) {
        OPAL_PMI_ERROR(rc, "PMI_KVS_Get_key_length_max");
        goto err_exit;
    }

    // Initialize job environment information
    pmix_id = (char*)malloc(pmix_vallen_max);
    if (pmix_id == NULL) {
        ret = OPAL_ERR_OUT_OF_RESOURCE;
        goto err_exit;
    }
    /* Get domain id */
    if (PMI_SUCCESS != (rc = PMI_Get_kvs_domain_id(pmix_id, pmix_vallen_max))) {
        free(pmix_id);
        goto err_exit;
    }

    /* get our rank */
    ret = PMI_Get_rank(&rank);
    if( PMI_SUCCESS != ret ) {
        OPAL_PMI_ERROR(ret, "PMI_Get_rank");
        free(pmix_id);
        goto err_exit;
    }

    /* Slurm PMI provides the job id as an integer followed
     * by a '.', followed by essentially a stepid. The first integer
     * defines an overall job number. The second integer is the number of
     * individual jobs we have run within that allocation. */
    s1_pname.jobid = strtoul(pmix_id, &str, 10);
    s1_pname.jobid = (s1_pname.jobid << 16) & 0xffff0000;
    if (NULL != str) {
        ui32 = strtoul(str, NULL, 10);
        s1_pname.jobid |= (ui32 & 0x0000ffff);
    }
    ldr.jobid = s1_pname.jobid;
    s1_pname.vpid = rank;
    /* store our name in the opal_proc_t so that
     * debug messages will make sense - an upper
     * layer will eventually overwrite it, but that
     * won't do any harm */
    opal_proc_set_name(&s1_pname);
    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                        "%s pmix:s1: assigned tmp name",
                        OPAL_NAME_PRINT(s1_pname));

    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_JOBID);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = s1_pname.jobid;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    /* save it */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_RANK);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = rank;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    pmix_kvs_name = (char*)malloc(pmix_kvslen_max);
    if (pmix_kvs_name == NULL) {
        ret = OPAL_ERR_OUT_OF_RESOURCE;
        goto err_exit;
    }

    rc = PMI_KVS_Get_my_name(pmix_kvs_name, pmix_kvslen_max);
    if (PMI_SUCCESS != rc) {
        OPAL_PMI_ERROR(rc, "PMI_KVS_Get_my_name");
        goto err_exit;
    }

    /* get our local proc info to find our local rank */
    if (PMI_SUCCESS != (rc = PMI_Get_clique_size(&nlranks))) {
        OPAL_PMI_ERROR(rc, "PMI_Get_clique_size");
        return rc;
    }
    /* save the local size */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_LOCAL_SIZE);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = nlranks;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);
    lrank = 0;
    nrank = 0;
    ldr.vpid = rank;
    if (0 < nlranks) {
        /* now get the specific ranks */
        lranks = (int*)calloc(nlranks, sizeof(int));
        if (NULL == lranks) {
            rc = OPAL_ERR_OUT_OF_RESOURCE;
            OPAL_ERROR_LOG(rc);
            return rc;
        }
        if (PMI_SUCCESS != (rc = PMI_Get_clique_ranks(lranks, nlranks))) {
            OPAL_PMI_ERROR(rc, "PMI_Get_clique_ranks");
            free(lranks);
            return rc;
        }
        /* note the local ldr */
        ldr.vpid = lranks[0];
        /* save this */
        memset(tmp, 0, 64);
        for (i=0; i < nlranks; i++) {
            (void)snprintf(tmp, 64, "%d", lranks[i]);
            opal_argv_append_nosize(&localranks, tmp);
            if (rank == lranks[i]) {
                lrank = i;
                nrank = i;
            }
        }
        str = opal_argv_join(localranks, ',');
        opal_argv_free(localranks);
        OBJ_CONSTRUCT(&kv, opal_value_t);
        kv.key = strdup(OPAL_PMIX_LOCAL_PEERS);
        kv.type = OPAL_STRING;
        kv.data.string = str;
        if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
            OPAL_ERROR_LOG(ret);
            OBJ_DESTRUCT(&kv);
            goto err_exit;
        }
        OBJ_DESTRUCT(&kv);
    }

    /* save the local leader */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_LOCALLDR);
    kv.type = OPAL_UINT64;
    kv.data.uint64 = *(uint64_t*)&ldr;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);
    /* save our local rank */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_LOCAL_RANK);
    kv.type = OPAL_UINT16;
    kv.data.uint16 = lrank;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);
    /* and our node rank */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_NODE_RANK);
    kv.type = OPAL_UINT16;
    kv.data.uint16 = nrank;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    /* get universe size */
    ret = PMI_Get_universe_size(&i);
    if (PMI_SUCCESS != ret) {
        OPAL_PMI_ERROR(ret, "PMI_Get_universe_size");
        goto err_exit;
    }
    /* push this into the dstore for subsequent fetches */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_UNIV_SIZE);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = i;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    /* get job size */
    ret = PMI_Get_size(&i);
    if (PMI_SUCCESS != ret) {
        OPAL_PMI_ERROR(ret, "PMI_Get_size");
        goto err_exit;
    }
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_JOB_SIZE);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = i;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

    /* get appnum */
    ret = PMI_Get_appnum(&i);
    if (PMI_SUCCESS != ret) {
        OPAL_PMI_ERROR(ret, "PMI_Get_appnum");
        goto err_exit;
    }
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_APPNUM);
    kv.type = OPAL_UINT32;
    kv.data.uint32 = i;
    if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
        OPAL_ERROR_LOG(ret);
        OBJ_DESTRUCT(&kv);
        goto err_exit;
    }
    OBJ_DESTRUCT(&kv);

   /* increment the init count */
    ++pmix_init_count;

    return OPAL_SUCCESS;

 err_exit:
    PMI_Finalize();
    return ret;
}
Beispiel #7
0
int
main(int argc, char *argv[]){
    int count;
    int msgsize;
    uint8_t *msg;
    int i, j, rc;
    orte_process_name_t peer;
    double maxpower;
    opal_buffer_t *buf;
    orte_rml_recv_cb_t blob;

    /*
     * Init
     */
    orte_init(&argc, &argv, ORTE_PROC_NON_MPI);

    if (argc > 1) {
        count = atoi(argv[1]);
        if (count < 0) {
            count = INT_MAX-1;
        }
    } else {
        count = MAX_COUNT;
    }

    peer.jobid = ORTE_PROC_MY_NAME->jobid;
    peer.vpid = ORTE_PROC_MY_NAME->vpid + 1;
    if (peer.vpid == orte_process_info.num_procs) {
        peer.vpid = 0;
    }

    for (j=1; j < count+1; j++) {
        /* rank0 starts ring */
        if (ORTE_PROC_MY_NAME->vpid == 0) {
            /* setup the initiating buffer - put random sized message in it */
            buf = OBJ_NEW(opal_buffer_t);

            maxpower = (double)(j%7);
            msgsize = (int)pow(10.0, maxpower);
            opal_output(0, "Ring %d message size %d bytes", j, msgsize);
            msg = (uint8_t*)malloc(msgsize);
            opal_dss.pack(buf, msg, msgsize, OPAL_BYTE);
            free(msg);
            orte_rml.send_buffer_nb(&peer, buf, MY_TAG, orte_rml_send_callback, NULL);

            /* wait for it to come around */
            OBJ_CONSTRUCT(&blob, orte_rml_recv_cb_t);
            blob.active = true;
            orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, MY_TAG,
                                    ORTE_RML_NON_PERSISTENT,
                                    orte_rml_recv_callback, &blob);
            ORTE_WAIT_FOR_COMPLETION(blob.active);
            OBJ_DESTRUCT(&blob);

            opal_output(0, "%s Ring %d completed", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j);
        } else {
            /* wait for msg */
            OBJ_CONSTRUCT(&blob, orte_rml_recv_cb_t);
            blob.active = true;
            orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, MY_TAG,
                                    ORTE_RML_NON_PERSISTENT,
                                    orte_rml_recv_callback, &blob);
            ORTE_WAIT_FOR_COMPLETION(blob.active);

            opal_output(0, "%s received message %d from %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j, ORTE_NAME_PRINT(&blob.name));

            /* send it along */
            buf = OBJ_NEW(opal_buffer_t);
            opal_dss.copy_payload(buf, &blob.data);
            OBJ_DESTRUCT(&blob);
            msg_active = true;
            orte_rml.send_buffer_nb(&peer, buf, MY_TAG, send_callback, NULL);
            ORTE_WAIT_FOR_COMPLETION(msg_active);
        }
    }

    orte_finalize();

    return 0;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
verbs_runtime_query(mca_base_module_t **module,
                    int *priority,
                    const char *hint)
{
    int rc = OSHMEM_SUCCESS;
    openib_device_t my_device;
    openib_device_t *device = &my_device;
    int num_devs = 0;
    int i = 0;

    *priority = 0;
    *module = NULL;

    memset(device, 0, sizeof(*device));

#ifdef HAVE_IBV_GET_DEVICE_LIST
    device->ib_devs = ibv_get_device_list(&num_devs);
#else
    #error unsupported ibv_get_device_list in infiniband/verbs.h
#endif

    if (num_devs == 0 || !device->ib_devs) {
        return OSHMEM_ERR_NOT_SUPPORTED;
    }

    /* Open device */
    if (NULL != mca_sshmem_verbs_component.hca_name) {
        for (i = 0; i < num_devs; i++) {
            if (0 == strcmp(mca_sshmem_verbs_component.hca_name, ibv_get_device_name(device->ib_devs[i]))) {
                device->ib_dev = device->ib_devs[i];
                break;
            }
        }
    } else {
        device->ib_dev = device->ib_devs[0];
    }

    if (NULL == device->ib_dev) {
        rc = OSHMEM_ERR_NOT_FOUND;
        goto out;
    }

    if (NULL == (device->ib_dev_context = ibv_open_device(device->ib_dev))) {
        rc = OSHMEM_ERR_RESOURCE_BUSY;
        goto out;
    }

    /* Obtain device attributes */
    if (ibv_query_device(device->ib_dev_context, &device->ib_dev_attr)) {
        rc = OSHMEM_ERR_RESOURCE_BUSY;
        goto out;
    }

    /* Allocate the protection domain for the device */
    device->ib_pd = ibv_alloc_pd(device->ib_dev_context);
    if (NULL == device->ib_pd) {
        rc = OSHMEM_ERR_RESOURCE_BUSY;
        goto out;
    }

    /* Allocate memory */
    if (!rc) {
        void *addr = NULL;
        size_t size = getpagesize();
        struct ibv_mr *ib_mr = NULL;
        uint64_t access_flag = IBV_ACCESS_LOCAL_WRITE |
                          IBV_ACCESS_REMOTE_WRITE |
                          IBV_ACCESS_REMOTE_READ; 
        uint64_t exp_access_flag = 0;

        OBJ_CONSTRUCT(&device->ib_mr_array, opal_value_array_t);
        opal_value_array_init(&device->ib_mr_array, sizeof(struct ibv_mr *));

#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
        exp_access_flag = IBV_EXP_ACCESS_ALLOCATE_MR  |
                          IBV_EXP_ACCESS_SHARED_MR_USER_READ |
                          IBV_EXP_ACCESS_SHARED_MR_USER_WRITE; 
#endif /* MPAGE_ENABLE */

        struct ibv_exp_reg_mr_in in = {device->ib_pd, addr, size, access_flag|exp_access_flag, 0};
        ib_mr = ibv_exp_reg_mr(&in);
        if (NULL == ib_mr) {
            rc = OSHMEM_ERR_OUT_OF_RESOURCE;
        } else {
            device->ib_mr_shared = ib_mr;
            opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
        }

#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
        if (!rc) {
            struct ibv_exp_reg_shared_mr_in in_smr;

            access_flag = IBV_ACCESS_LOCAL_WRITE |
                          IBV_ACCESS_REMOTE_WRITE |
                          IBV_ACCESS_REMOTE_READ|
                          IBV_EXP_ACCESS_NO_RDMA;

            addr = (void *)mca_sshmem_base_start_address;
            mca_sshmem_verbs_fill_shared_mr(&in_smr, device->ib_pd, device->ib_mr_shared->handle,  addr, access_flag);
            ib_mr = ibv_exp_reg_shared_mr(&in_smr);
            if (NULL == ib_mr) {
                mca_sshmem_verbs_component.has_shared_mr = 0;
            } else {
                opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
                mca_sshmem_verbs_component.has_shared_mr = 1;
            }
        }
#endif /* MPAGE_ENABLE */
    }

    /* all is well - rainbows and butterflies */
    if (!rc) {
        *priority = mca_sshmem_verbs_component.priority;
        *module = (mca_base_module_t *)&mca_sshmem_verbs_module.super;
    }

out:
    if (device) {
        if (opal_value_array_get_size(&device->ib_mr_array)) {
            struct ibv_mr** array;
            struct ibv_mr* ib_mr = NULL;
            array = OPAL_VALUE_ARRAY_GET_BASE(&device->ib_mr_array, struct ibv_mr *);
            while (opal_value_array_get_size(&device->ib_mr_array) > 0) {
                ib_mr = array[0];
                ibv_dereg_mr(ib_mr);
                opal_value_array_remove_item(&device->ib_mr_array, 0);
            }

            if (device->ib_mr_shared) {
                device->ib_mr_shared = NULL;
            }
            OBJ_DESTRUCT(&device->ib_mr_array);
        }

        if (device->ib_pd) {
            ibv_dealloc_pd(device->ib_pd);
            device->ib_pd = NULL;
        }

        if(device->ib_dev_context) {
            ibv_close_device(device->ib_dev_context);
            device->ib_dev_context = NULL;
        }

        if(device->ib_devs) {
            ibv_free_device_list(device->ib_devs);
            device->ib_devs = NULL;
        }
    }

    return rc;
}
static int discover(opal_list_t* nodelist, ICluster* pCluster)
{
    int ret = ORTE_ERROR;
    int32_t nodeid;
    orte_node_t *node;
    opal_list_item_t* item;
    opal_list_t new_nodes;
    struct timeval start, stop;

    HRESULT hr = S_OK;  
    long idle_processors = 0;
    IClusterEnumerable* pNodesCollection = NULL;
    IEnumVARIANT* pNodes = NULL;
    INode* pNode = NULL;
    BSTR node_name = NULL, node_arch = NULL;
    VARIANT var;
    NodeStatus Status;
    size_t len;

    /* check for timing request - get start time if so */
    if (orte_timing) {
        gettimeofday(&start, NULL);
    }
    
    /* Get the collection of nodes. */
    hr = pCluster->get_ComputeNodes(&pNodesCollection);
    if (FAILED(hr)) {
        ras_get_cluster_message(pCluster);
        OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
                            "ras:ccp:pCluster->get_ComputeNodes failed."));
        return ORTE_ERROR;
    }

    /* Get the enumerator used to iterate through the collection. */
    hr = pNodesCollection->GetEnumerator(&pNodes);
    if (FAILED(hr)) {
        ras_get_cluster_message(pCluster);
        OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
                            "ras:ccp:pNodesCollection->GetEnumerator failed."));
        return ORTE_ERROR;
    }

    VariantInit(&var);

    /* Construct new node list. */
    OBJ_CONSTRUCT(&new_nodes, opal_list_t);
    nodeid=0;

    /* Loop through the collection. */
    while (hr = pNodes->Next(1, &var, NULL) == S_OK) {
        var.pdispVal->QueryInterface(IID_INode, reinterpret_cast<void **> (&pNode));

        /* Check wether the node is ready. 
         * There are four states:
         *          NodeStatus_Ready = 0,
         *          NodeStatus_Paused = 1,
         *          NodeStatus_Unreachable = 2, probably not a windows cluster node.
         *          NodeStatus_PendingApproval = 3
         */
        hr = pNode->get_Status(&Status);
        if (FAILED(hr)) {
            OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
                                "ras:ccp:pNode->get_Status failed."));
            ret = ORTE_ERROR;
            goto cleanup;
        }

        /* Get available number of processors on each node. */
        hr = pNode->get_NumberOfIdleProcessors(&idle_processors);
        if (FAILED(hr)) {
            OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
                                "ras:ccp:pNode->get_NumberOfIdleProcessors failed."));
            ret = ORTE_ERROR;
            goto cleanup;
        }

        /* Do we have enough processors on the available nodes? 
         * Question: How do we get the required number of processors?
         */
        if ( (Status == NodeStatus_Ready) && (idle_processors > 0) ) {

            /* Get node name. */
            hr = pNode->get_Name(&node_name);
            if (FAILED(hr)) {
                OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
                                    "ras:ccp:pNode->get_Name failed."));
                ret = ORTE_ERROR;
                goto cleanup;
            }

            /* Get node processor architecture. */
            hr = pNode->get_ProcessorArchitecture(&node_arch);
            if (FAILED(hr)) {
                OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
                                    "ras:ccp:pNode->get_ProcessorArchitecture failed."));
                ret = ORTE_ERROR;
                goto cleanup;
            }

            /* Prevent duplicated nodes in the list*/
            for (item = opal_list_get_first(&new_nodes);
                 opal_list_get_end(&new_nodes) != item;
                 item = opal_list_get_next(item)) {

                node = (orte_node_t*) item;
                if (0 == strcmp(node->name, (char *)node_name)) {
                    ++node->slots;
                    OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
                                        "ras:ccp:allocate:discover: found -- bumped slots to %d",
                                        node->slots));
                    break;
                }
            }
            /* Did we find it? */

            if (opal_list_get_end(&new_nodes) == item) {

                /* Nope -- didn't find it, so add a new item to the list */

                OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
                                    "ras:ccp:allocate:discover: not found -- added to list"));

                node = OBJ_NEW(orte_node_t);

                /* The function _dupenv_s is much safer than getenv on Windows. */
                _dupenv_s(&node->username, &len, "username");

                node->name = _com_util::ConvertBSTRToString(node_name);
                node->launch_id = nodeid;
                node->slots_inuse = 0;
                node->slots_max = 0;
                node->slots = 1;
                opal_list_append(nodelist, &node->super);
            } 
            /* up the nodeid */
            nodeid++;
        }

        pNode->Release();
        VariantClear(&var);
    }

    pNodes->Release();

    if (nodeid > 0) ret = ORTE_SUCCESS;

    /* All done */
cleanup:

    if (ORTE_SUCCESS == ret) {
        OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
                            "ras:ccp:allocate:discover: success"));
    } else {
        OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
                            "ras:ccp:allocate:discover: failed (rc=%d)", ret));
    }

    OBJ_DESTRUCT(&new_nodes);
    SysFreeString(node_name);
    SysFreeString(node_arch);

    /* check for timing request - get stop time and report elapsed time if so */
    if (orte_timing) {
        gettimeofday(&stop, NULL);
        opal_output(0, "ras_ccp: time to allocate is %ld usec",
                    (long int)((stop.tv_sec - start.tv_sec)*1000000 +
                               (stop.tv_usec - start.tv_usec)));
        gettimeofday(&start, NULL);
    }
    
    return ret;
}
int vprotocol_pessimist_event_logger_connect(int el_rank, ompi_communicator_t **el_comm)
{
    int rc;
    opal_buffer_t buffer;
    char *port;
    orte_process_name_t el_proc;
    char *hnp_uri, *rml_uri;
    orte_rml_tag_t el_tag;
    char name[MPI_MAX_PORT_NAME];
    int rank;
    vprotocol_pessimist_clock_t connect_info[2];
    
    snprintf(name, MPI_MAX_PORT_NAME, VPROTOCOL_EVENT_LOGGER_NAME_FMT, el_rank);
    port = ompi_pubsub.lookup(name, MPI_INFO_NULL);
    if(NULL == port)
    {
        return OMPI_ERR_NOT_FOUND;
    }
    V_OUTPUT_VERBOSE(45, "Found port < %s >", port);
    
    /* separate the string into the HNP and RML URI and tag */
    if (OMPI_SUCCESS != (rc = ompi_dpm.parse_port(port, &hnp_uri, &rml_uri, &el_tag))) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }
    /* extract the originating proc's name */
    if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(rml_uri, &el_proc, NULL))) {
        ORTE_ERROR_LOG(rc);
        free(rml_uri); free(hnp_uri);
        return rc;
    }
    /* make sure we can route rml messages to the destination */
    if (OMPI_SUCCESS != (rc = ompi_dpm.route_to_port(hnp_uri, &el_proc))) {
        ORTE_ERROR_LOG(rc);
        free(rml_uri); free(hnp_uri);
        return rc;
    }
    free(rml_uri); free(hnp_uri);
    
    /* Send an rml message to tell the remote end to wake up and jump into 
     * connect/accept */
    OBJ_CONSTRUCT(&buffer, opal_buffer_t);
    rc = orte_rml.send_buffer(&el_proc, &buffer, el_tag+1, 0);
    if(ORTE_SUCCESS > rc) {
        ORTE_ERROR_LOG(rc);
        OBJ_DESTRUCT(&buffer);        
        return rc;
    }
    OBJ_DESTRUCT(&buffer);

    rc = ompi_dpm.connect_accept(MPI_COMM_SELF, 0, port, true, el_comm);
    if(OMPI_SUCCESS != rc) {
        ORTE_ERROR_LOG(rc);
    }
    
    /* Send Rank, receive max buffer size and max_clock back */
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    rc = mca_pml_v.host_pml.pml_send(&rank, 1, MPI_INTEGER, 0, 
                                     VPROTOCOL_PESSIMIST_EVENTLOG_NEW_CLIENT_CMD,
                                     MCA_PML_BASE_SEND_STANDARD, 
                                     mca_vprotocol_pessimist.el_comm);
    if(OPAL_UNLIKELY(MPI_SUCCESS != rc))
        OMPI_ERRHANDLER_INVOKE(mca_vprotocol_pessimist.el_comm, rc,
                               __FILE__ ": failed sending event logger handshake");
    rc = mca_pml_v.host_pml.pml_recv(&connect_info, 2, MPI_UNSIGNED_LONG_LONG, 
                                     0, VPROTOCOL_PESSIMIST_EVENTLOG_NEW_CLIENT_CMD,
                                     mca_vprotocol_pessimist.el_comm, MPI_STATUS_IGNORE);
    if(OPAL_UNLIKELY(MPI_SUCCESS != rc))                                  \
        OMPI_ERRHANDLER_INVOKE(mca_vprotocol_pessimist.el_comm, rc,       \
                               __FILE__ ": failed receiving event logger handshake");   
    
    return rc;
}
Beispiel #11
0
static void ofacm_base_proc_destructor (opal_common_ofacm_base_proc_t *proc)
{
    OBJ_DESTRUCT(&proc->all_contexts);
}
Beispiel #12
0
int orte_util_get_ordered_host_list(opal_list_t *nodes,
                                    char *hostfile)
{
    opal_list_t exclude;
    opal_list_item_t *item, *itm, *item2, *item1;
    char *cptr;
    int num_empty, i, nodeidx, startempty=0;
    bool want_all_empty=false;
    orte_node_t *node_from_pool, *newnode;
    int rc;
    
    OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
                         "%s hostfile: creating ordered list of hosts from hostfile %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hostfile));
    
    OBJ_CONSTRUCT(&exclude, opal_list_t);
    
    /* parse the hostfile and add the contents to the list, keeping duplicates */
    if (ORTE_SUCCESS != (rc = hostfile_parse(hostfile, nodes, &exclude, true))) {
        goto cleanup;
    }
    
    /* parse the nodes to process any relative node directives */
    item2 = opal_list_get_first(nodes);
    while (item2 != opal_list_get_end(nodes)) {
        orte_node_t *node=(orte_node_t*)item2;
        
        /* save the next location in case this one gets removed */
        item1 = opal_list_get_next(item2);
        
        if ('+' != node->name[0]) {
            item2 = item1;
            continue;
        }
        
        /* see if we specified empty nodes */
        if ('e' == node->name[1] ||
            'E' == node->name[1]) {
            /* request for empty nodes - do they want
             * all of them?
             */
            if (NULL != (cptr = strchr(node->name, ':'))) {
                /* the colon indicates a specific # are requested */
                cptr++; /* step past : */
                num_empty = strtol(cptr, NULL, 10);
            } else {
                /* want them all - set num_empty to max */
                num_empty = INT_MAX;
                want_all_empty = true;
            }
            /* insert empty nodes into newnodes list in place of the current item.
             * since item1 is the next item, we insert in front of it
             */
            if (!orte_hnp_is_allocated && 0 == startempty) {
               startempty = 1;
            }
            for (i=startempty; 0 < num_empty && i < orte_node_pool->size; i++) {
                if (NULL == (node_from_pool = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
                    continue;
                }
                if (0 == node_from_pool->slots_inuse) {
                    newnode = OBJ_NEW(orte_node_t);
                    newnode->name = strdup(node_from_pool->name);
                    /* if the slot count here is less than the
                     * total slots avail on this node, set it
                     * to the specified count - this allows people
                     * to subdivide an allocation
                     */
                    if (node->slots < node_from_pool->slots) {
                        newnode->slots = node->slots;
                    } else {
                        newnode->slots = node_from_pool->slots;
                    }
                    opal_list_insert_pos(nodes, item1, &newnode->super);
                    /* track number added */
                    --num_empty;
                }
            }
            /* bookmark where we stopped in case they ask for more */
            startempty = i;
            /* did they get everything they wanted? */
            if (!want_all_empty && 0 < num_empty) {
                orte_show_help("help-hostfile.txt", "hostfile:not-enough-empty",
                               true, num_empty);
                rc = ORTE_ERR_SILENT;
                goto cleanup;
            }            
            /* since we have expanded the provided node, remove
             * it from list
             */
            opal_list_remove_item(nodes, item2);
            OBJ_RELEASE(item2);
        } else if ('n' == node->name[1] ||
                   'N' == node->name[1]) {
            /* they want a specific relative node #, so
             * look it up on global pool
             */
            nodeidx = strtol(&node->name[2], NULL, 10);
            /* if the HNP is not allocated, then we need to
             * adjust the index as the node pool is offset
             * by one
             */
            if (!orte_hnp_is_allocated) {
                nodeidx++;
            }
            /* see if that location is filled */
            if (NULL == (node_from_pool = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, nodeidx))) {
                /* this is an error */
                orte_show_help("help-hostfile.txt", "hostfile:relative-node-not-found",
                               true, nodeidx, node->name);
                rc = ORTE_ERR_SILENT;
                goto cleanup;
            }
            /* create the node object */
            newnode = OBJ_NEW(orte_node_t);
            newnode->name = strdup(node_from_pool->name);
            /* if the slot count here is less than the
             * total slots avail on this node, set it
             * to the specified count - this allows people
             * to subdivide an allocation
             */
            if (node->slots < node_from_pool->slots) {
                newnode->slots = node->slots;
            } else {
                newnode->slots = node_from_pool->slots;
            }
            /* insert it before item1 */
            opal_list_insert_pos(nodes, item1, &newnode->super);
            /* since we have expanded the provided node, remove
             * it from list
             */
            opal_list_remove_item(nodes, item2);
            OBJ_RELEASE(item2);
        } else {
            /* invalid relative node syntax */
            orte_show_help("help-hostfile.txt", "hostfile:invalid-relative-node-syntax",
                           true, node->name);
            rc = ORTE_ERR_SILENT;
            goto cleanup;
        }
        
        /* move to next */
        item2 = item1;
    }

    /* remove from the list of nodes those that are in the exclude list */
    while(NULL != (item = opal_list_remove_first(&exclude))) {
        orte_node_t *exnode = (orte_node_t*)item;
        /* check for matches on nodes */
        for (itm = opal_list_get_first(nodes);
             itm != opal_list_get_end(nodes);
             itm = opal_list_get_next(itm)) {
            orte_node_t *node=(orte_node_t*)itm;
            if (0 == strcmp(exnode->name, node->name)) {
                /* match - remove it */
                opal_list_remove_item(nodes, itm);
                OBJ_RELEASE(itm);
                /* have to cycle through the entire list as we could
                 * have duplicates
                 */
            }
        }
        OBJ_RELEASE(item);
    }
    
cleanup:
    OBJ_DESTRUCT(&exclude);
    
    return rc;
}
Beispiel #13
0
/* Parse the provided hostfile and filter the nodes that are
 * on the input list, removing those that
 * are not found in the hostfile
 */
int orte_util_filter_hostfile_nodes(opal_list_t *nodes,
                                    char *hostfile,
                                    bool remove)
{
    opal_list_t newnodes, exclude;
    opal_list_item_t *item1, *item2, *next, *item3;
    orte_node_t *node_from_list, *node_from_file, *node_from_pool, *node3;
    int rc = ORTE_SUCCESS;
    char *cptr;
    int num_empty, nodeidx;
    bool want_all_empty = false;
    opal_list_t keep;
    bool found;

    OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
                        "%s hostfile: filtering nodes through hostfile %s",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hostfile));

    /* parse the hostfile and create local list of findings */
    OBJ_CONSTRUCT(&newnodes, opal_list_t);
    OBJ_CONSTRUCT(&exclude, opal_list_t);
    if (ORTE_SUCCESS != (rc = hostfile_parse(hostfile, &newnodes, &exclude, false))) {
        OBJ_DESTRUCT(&newnodes);
        OBJ_DESTRUCT(&exclude);
        return rc;
    }
    
    /* if the hostfile was empty, then treat it as a no-op filter */
    if (0 == opal_list_get_size(&newnodes)) {
        OBJ_DESTRUCT(&newnodes);
        OBJ_DESTRUCT(&exclude);
        /* indicate that the hostfile was empty */
        return ORTE_ERR_TAKE_NEXT_OPTION;
    }

    /* remove from the list of newnodes those that are in the exclude list
     * since we could have added duplicate names above due to the */
    while (NULL != (item1 = opal_list_remove_first(&exclude))) {
        node_from_file = (orte_node_t*)item1;
        /* check for matches on nodes */
        for (item2 = opal_list_get_first(&newnodes);
             item2 != opal_list_get_end(&newnodes);
             item2 = opal_list_get_next(item2)) {
            orte_node_t *node = (orte_node_t*)item2;
            if (0 == strcmp(node_from_file->name, node->name)) {
                /* match - remove it */
                opal_output(0, "HOST %s ON EXCLUDE LIST - REMOVING", node->name);
                opal_list_remove_item(&newnodes, item2);
                OBJ_RELEASE(item2);
                break;
            }
        }
        OBJ_RELEASE(item1);
    }
    
    /* now check our nodes and keep or mark those that match. We can
     * destruct our hostfile list as we go since this won't be needed
     */
    OBJ_CONSTRUCT(&keep, opal_list_t);
    while (NULL != (item2 = opal_list_remove_first(&newnodes))) {
        node_from_file = (orte_node_t*)item2;
        
        next = opal_list_get_next(item2);
        
        /* see if this is a relative node syntax */
        if ('+' == node_from_file->name[0]) {
            /* see if we specified empty nodes */
            if ('e' == node_from_file->name[1] ||
                'E' == node_from_file->name[1]) {
                /* request for empty nodes - do they want
                 * all of them?
                 */
                if (NULL != (cptr = strchr(node_from_file->name, ':'))) {
                    /* the colon indicates a specific # are requested */
                    cptr++; /* step past : */
                    num_empty = strtol(cptr, NULL, 10);
                } else {
                    /* want them all - set num_empty to max */
                    num_empty = INT_MAX;
                    want_all_empty = true;
                }
                /* search the list of nodes provided to us and find those
                 * that are empty
                 */
                item1 = opal_list_get_first(nodes);
                while (0 < num_empty && item1 != opal_list_get_end(nodes)) {
                    node_from_list = (orte_node_t*)item1;
                    next = opal_list_get_next(item1);  /* keep our place */
                    if (0 == node_from_list->slots_inuse) {
                        /* check to see if this node is explicitly called
                         * out later - if so, don't use it here
                         */
                        for (item3 = opal_list_get_first(&newnodes);
                             item3 != opal_list_get_end(&newnodes);
                             item3 = opal_list_get_next(item3)) {
                            node3 = (orte_node_t*)item3;
                            if (0 == strcmp(node3->name, node_from_list->name)) {
                                /* match - don't use it */
                                goto skipnode;
                            }
                        }
                        if (remove) {
                            /* remove item from list */
                            opal_list_remove_item(nodes, item1);
                            /* xfer to keep list */
                            opal_list_append(&keep, item1);
                        } else {
                            /* mark as included */
                            node_from_list->mapped = true;
                        }
                        --num_empty;
                    }
                skipnode:
                    item1 = next;
                }
                /* did they get everything they wanted? */
                if (!want_all_empty && 0 < num_empty) {
                    orte_show_help("help-hostfile.txt", "hostfile:not-enough-empty",
                                   true, num_empty);
                    rc = ORTE_ERR_SILENT;
                    goto cleanup;
                }            
            } else if ('n' == node_from_file->name[1] ||
                       'N' == node_from_file->name[1]) {
                /* they want a specific relative node #, so
                 * look it up on global pool
                 */
                nodeidx = strtol(&node_from_file->name[2], NULL, 10);
                if (NULL == (node_from_pool = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, nodeidx))) {
                    /* this is an error */
                    orte_show_help("help-hostfile.txt", "hostfile:relative-node-not-found",
                                   true, nodeidx, node_from_file->name);
                    rc = ORTE_ERR_SILENT;
                    goto cleanup;
                }
                /* search the list of nodes provided to us and find it */
                for (item1 = opal_list_get_first(nodes);
                     item1 != opal_list_get_end(nodes);
                     item1 = opal_list_get_next(nodes)) {
                    node_from_list = (orte_node_t*)item1;
                    if (0 == strcmp(node_from_list->name, node_from_pool->name)) {
                        if (remove) {
                            /* match - remove item from list */
                            opal_list_remove_item(nodes, item1);
                            /* xfer to keep list */
                            opal_list_append(&keep, item1);
                        } else {
                            /* mark as included */
                            node_from_list->mapped = true;
                        }
                        break;
                    }
                }
            } else {
                /* invalid relative node syntax */
                orte_show_help("help-hostfile.txt", "hostfile:invalid-relative-node-syntax",
                               true, node_from_file->name);
                rc = ORTE_ERR_SILENT;
                goto cleanup;
            }
        } else {
            /* we are looking for a specific node on the list
             * search the provided list of nodes to see if this
             * one is found
             */
            found = false;
            for (item1 = opal_list_get_first(nodes);
                 item1 != opal_list_get_end(nodes);
                 item1 = opal_list_get_next(item1)) {
                node_from_list = (orte_node_t*)item1;
                /* since the name in the hostfile might not match
                 * our local name, and yet still be intended to match,
                 * we have to check for local interfaces
                 */
                if (0 == strcmp(node_from_file->name, node_from_list->name) ||
                    (0 == strcmp(node_from_file->name, "localhost") &&
                     0 == strcmp(node_from_list->name, orte_process_info.nodename)) ||
                    (opal_ifislocal(node_from_list->name) &&
                     opal_ifislocal(node_from_file->name))) {
                    /* if the slot count here is less than the
                     * total slots avail on this node, set it
                     * to the specified count - this allows people
                     * to subdivide an allocation
                     */
                    if (node_from_file->slots < node_from_list->slots) {
                        node_from_list->slots = node_from_file->slots;
                    }
                    if (remove) {
                        /* remove the node from the list */
                        opal_list_remove_item(nodes, item1);
                        /* xfer it to keep list */
                        opal_list_append(&keep, item1);
                    } else {
                        /* mark as included */
                        node_from_list->mapped = true;
                    }
                    found = true;
                    break;
                }
            }
            /* if the host in the newnode list wasn't found,
             * then that is an error we need to report to the
             * user and abort
             */
            if (!found) {
                orte_show_help("help-hostfile.txt", "hostfile:extra-node-not-found",
                               true, hostfile, node_from_file->name);
                rc = ORTE_ERR_SILENT;
                goto cleanup;
            }
        }
        /* cleanup the newnode list */
        OBJ_RELEASE(item2);
    }
    
    /* if we still have entries on our hostfile list, then
     * there were requested hosts that were not in our allocation.
     * This is an error - report it to the user and return an error
     */
    if (0 != opal_list_get_size(&newnodes)) {
        orte_show_help("help-hostfile.txt", "not-all-mapped-alloc",
                       true, hostfile);
        while (NULL != (item1 = opal_list_remove_first(&newnodes))) {
            OBJ_RELEASE(item1);
        }
        OBJ_DESTRUCT(&newnodes);
        return ORTE_ERR_SILENT;
    }

    if (!remove) {
        /* all done */
        OBJ_DESTRUCT(&newnodes);
        return ORTE_SUCCESS;
    }

    /* clear the rest of the nodes list */
    while (NULL != (item1 = opal_list_remove_first(nodes))) {
        OBJ_RELEASE(item1);
    }
    
    /* the nodes list has been cleared - rebuild it in order */
    while (NULL != (item1 = opal_list_remove_first(&keep))) {
        opal_list_append(nodes, item1);
    }
    
cleanup:
    OBJ_DESTRUCT(&newnodes);

    return rc;
}
Beispiel #14
0
int
main(int argc, char *argv[])
{
    orcm_alloc_t alloc, *aptr;
    orte_rml_recv_cb_t xfer;
    opal_buffer_t *buf;
    int rc, n;
    orcm_scd_cmd_flag_t command=ORCM_SESSION_REQ_COMMAND;
    orcm_alloc_id_t id;
    struct timeval tv;

    /* initialize, parse command line, and setup frameworks */
    orcm_osub_init(argc, argv);

    /* create an allocation request */
    OBJ_CONSTRUCT(&alloc, orcm_alloc_t);

    alloc.priority = 1;                                // session priority
    alloc.account = orcm_osub_globals.account;         // account to be charged
    alloc.name = orcm_osub_globals.name;               // user-assigned project name
    alloc.gid = orcm_osub_globals.gid;                 // group id to be run under
    alloc.max_nodes = orcm_osub_globals.max_nodes;     // max number of nodes
    alloc.max_pes = orcm_osub_globals.max_pes;         // max number of processing elements
    alloc.min_nodes = orcm_osub_globals.min_nodes;     // min number of nodes required
    alloc.min_pes = orcm_osub_globals.min_pes;         // min number of pe's required
    alloc.exclusive = orcm_osub_globals.exclusive;     // true if nodes to be exclusively allocated (i.e., not shared across sessions)
    alloc.interactive = orcm_osub_globals.interactive; // true if in interactive mode
    alloc.nodes = '\0';                                // regex of nodes to be used
    alloc.parent_name = ORTE_NAME_PRINT(ORTE_PROC_MY_NAME); // my_daemon_name
    alloc.parent_uri = '\0';                           // my_daemon uri address
    /* alloc.constraints = orcm_osub_globals.resources */ ; // list of resource constraints to be applied when selecting hosts
    alloc.hnpname = '\0'; //my hnp name
    alloc.hnpuri = '\0'; //my hnp uri

    alloc.caller_uid = getuid();   // caller uid, not from args
    alloc.caller_gid = getgid();   // caller gid, not from args

    if (NULL == orcm_osub_globals.starttime || 0 == strlen(orcm_osub_globals.starttime)) {
        gettimeofday(&tv,NULL);
        /* desired start time for allocation deafults to now */
        alloc.begin = tv.tv_sec;
    } else {
        /* TODO: eventually parse the string to figure out what user means, for now its now */
        gettimeofday(&tv,NULL);
        alloc.begin = tv.tv_sec;
    }

    if (NULL == orcm_osub_globals.walltime || 0 == strlen(orcm_osub_globals.walltime)) {
        /* desired walltime default to 10 min */
        alloc.walltime = 600;
    } else {
        /* get this in seconds for now, but will be parsed for more complexity later */
        alloc.walltime = (time_t)strtol(orcm_osub_globals.walltime, NULL, 10);                               // max execution time
    }

    /* setup to receive the result */
    OBJ_CONSTRUCT(&xfer, orte_rml_recv_cb_t);
    xfer.active = true;
    orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
                            ORCM_RML_TAG_SCD,
                            ORTE_RML_NON_PERSISTENT,
                            orte_rml_recv_callback, &xfer);

    /* send it to the scheduler */
    buf = OBJ_NEW(opal_buffer_t);
    /* pack the alloc command flag */
    if (OPAL_SUCCESS != (rc = opal_dss.pack(buf, &command,1, ORCM_SCD_CMD_T))) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }

    aptr = &alloc;
    if (OPAL_SUCCESS != (rc = opal_dss.pack(buf, &aptr, 1, ORCM_ALLOC))) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }
    if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_SCHEDULER, buf,
                                                      ORCM_RML_TAG_SCD,
                                                      orte_rml_send_callback, NULL))) {
        ORTE_ERROR_LOG(rc);
        OBJ_RELEASE(buf);
        OBJ_DESTRUCT(&xfer);
        return rc;
    }

    /* get our allocated jobid */
    n=1;
    ORTE_WAIT_FOR_COMPLETION(xfer.active);
    if (OPAL_SUCCESS != (rc = opal_dss.unpack(&xfer.data, &id, &n, ORCM_ALLOC_ID_T))) {
        ORTE_ERROR_LOG(rc);
        OBJ_DESTRUCT(&xfer);
        return rc;
    }
    opal_output(0, "RECEIVED ALLOC ID %d", (int)id);

    if (ORTE_SUCCESS != orcm_finalize()) {
        fprintf(stderr, "Failed orcm_finalize\n");
        exit(1);
    }

    return ORTE_SUCCESS;
}
int mca_fcoll_base_file_select (struct mca_io_ompio_file_t *file,
                                mca_base_component_t *preferred) 
{
    int priority; 
    int best_priority; 
    opal_list_item_t *item; 
    opal_list_item_t *next_item; 
    char *names, **name_array;
    int num_names;
    mca_base_component_priority_list_item_t *cpli;
    mca_fcoll_base_component_t *component; 
    mca_fcoll_base_component_t *best_component;
    mca_fcoll_base_module_t *module; 
    opal_list_t queried;
    queried_module_t *om;
    opal_list_t *selectable;
    char *str;
    int err = MPI_SUCCESS;
    int i;
    bool was_selectable_constructed = false;

    /* Check and see if a preferred component was provided. If it was
     provided then it should be used (if possible) */
    if (NULL != preferred) {
         
        /* We have a preferred component. Check if it is available
           and if so, whether it wants to run */
         
         str = &(preferred->mca_component_name[0]);
         
         opal_output_verbose(10, mca_fcoll_base_output,
                             "fcoll:base:file_select: Checking preferred component: %s",
                             str);
         
         /* query the component for its priority and get its module 
            structure. This is necessary to proceed */
         
         component = (mca_fcoll_base_component_t *)preferred;
         module = component->fcollm_file_query (file, &priority);
         if (NULL != module && 
             NULL != module->fcoll_module_init) {

             /* this query seems to have returned something legitimate
              * and we can now go ahead and initialize the
              * file with it * but first, the functions which
              * are null need to be filled in */

             /*fill_null_pointers (module);*/
             file->f_fcoll = module;
             file->f_fcoll_component = preferred;

             return module->fcoll_module_init(file);
         } 
            /* His preferred component is present, but is unable to
             * run. This is not a good sign. We should try selecting
             * some other component We let it fall through and select
             * from the list of available components
             */
     } /*end of selection for preferred component */

    /*
     * We fall till here if one of the two things happened:
     * 1. The preferred component was provided but for some reason was
     * not able to be selected
     * 2. No preferred component was provided
     *
     * All we need to do is to go through the list of available
     * components and find the one which has the highest priority and
     * use that for this file
     */ 

    /* Check if anything was requested by means on the name parameters */
    names = NULL;
    mca_base_param_lookup_string (mca_fcoll_base_param, &names);

    if (NULL != names && 0 < strlen(names)) {
        name_array = opal_argv_split (names, ',');
        num_names = opal_argv_count (name_array);

        opal_output_verbose(10, mca_fcoll_base_output,
                            "fcoll:base:file_Select: Checking all available module");

        /* since there are somethings which the mca requested through the 
           if the intersection is NULL, then we barf saying that the requested
           modules are not being available */

        selectable = OBJ_NEW(opal_list_t);
        was_selectable_constructed = true;
        
        /* go through the compoents_available list and check against the names
         * to see whether this can be added or not */

        for (item = opal_list_get_first(&mca_fcoll_base_components_available);
            item != opal_list_get_end(&mca_fcoll_base_components_available);
            item = opal_list_get_next(item)) {
            /* convert the opal_list_item_t returned into the proper type */
            cpli = (mca_base_component_priority_list_item_t *) item;
            component = (mca_fcoll_base_component_t *) cpli->super.cli_component;
            opal_output_verbose(10, mca_fcoll_base_output,
                                "select: initialising %s component %s",
                                component->fcollm_version.mca_type_name,
                                component->fcollm_version.mca_component_name);

            /* check if this name is present in the mca_base_params */
            for (i=0; i < num_names; i++) {
                if (0 == strcmp(name_array[i], component->fcollm_version.mca_component_name)) {
                    /* this is present, and should be added o the selectable list */

                    /* We need to create a seperate object to initialise this list with
                     * since we cannot have the same item in 2 lists */
                    module = component->fcollm_file_query (file, &priority);
                    if (NULL != module && 
                        NULL != module->fcoll_module_init) {

                        file->f_fcoll = module;
                        file->f_fcoll_component = (mca_base_component_t *)component;
                        return module->fcoll_module_init(file);
                    }

                    /*
                    selectable_item = OBJ_NEW (mca_base_component_priority_list_item_t);
                    *selectable_item = *cpli;
                    opal_list_append (selectable, (opal_list_item_t *)selectable_item);
                    break;*/
                }
            }
        }
        
        /* check for a NULL intersection between the available list and the 
         * list which was asked for */

        if (0 == opal_list_get_size(selectable)) {
            was_selectable_constructed = true;
            OBJ_RELEASE (selectable);
            opal_output_verbose (10, mca_fcoll_base_output,
                                 "fcoll:base:file_select: preferred modules were not available");
            return OMPI_ERROR;
        }
    } else { /* if there was no name_array, then we need to simply initialize 
                selectable to mca_fcoll_base_components_available */
        selectable = &mca_fcoll_base_components_available;
    }

    best_component = NULL;
    best_priority = -1;
    OBJ_CONSTRUCT(&queried, opal_list_t);

    for (item = opal_list_get_first(selectable);
         item != opal_list_get_end(selectable);
         item = opal_list_get_next(item)) {
       /*
        * convert the opal_list_item_t returned into the proper type
        */
       cpli = (mca_base_component_priority_list_item_t *) item;
       component = (mca_fcoll_base_component_t *) cpli->super.cli_component;
       opal_output_verbose(10, mca_fcoll_base_output,
                           "select: initialising %s component %s",
                           component->fcollm_version.mca_type_name,
                           component->fcollm_version.mca_component_name);

       /*
        * we can call the query function only if there is a function :-)
        */
       if (NULL == component->fcollm_file_query) {
          opal_output_verbose(10, mca_fcoll_base_output,
                             "select: no query, ignoring the component");
       } else {
           /*
            * call the query function and see what it returns
            */ 
           module = component->fcollm_file_query (file, &priority);

           if (NULL == module ||
               NULL == module->fcoll_module_init) {
               /*
                * query did not return any action which can be used
                */ 
               opal_output_verbose(10, mca_fcoll_base_output,
                                  "select: query returned failure");
           } else {
               opal_output_verbose(10, mca_fcoll_base_output,
                                  "select: query returned priority %d",
                                  priority);
               /* 
                * is this the best component we have found till now?
                */
               if (priority > best_priority) {
                   best_priority = priority;
                   best_component = component;
               }

               om = OBJ_NEW(queried_module_t);
               /*
                * check if we have run out of space
                */
               if (NULL == om) {
                   OBJ_DESTRUCT(&queried);
                   return OMPI_ERR_OUT_OF_RESOURCE;
               }
               om->om_component = component;
               om->om_module = module; 
               opal_list_append(&queried, (opal_list_item_t *)om); 
           } /* end else of if (NULL == module) */
       } /* end else of if (NULL == component->fcollm_init) */
    } /* end for ... end of traversal */

    /* We have to remove empty out the selectable list if the selectable 
     * list was constructed as a duplicate and not as a pointer to the
     * mca_base_components_available list. So, check and destroy */

    if (was_selectable_constructed) {

        /* remove all the items first */
        for (item = opal_list_get_first(&mca_fcoll_base_components_available);
             item != opal_list_get_end(&mca_fcoll_base_components_available);
             item = next_item) {
             next_item = opal_list_get_next(item);
             OBJ_RELEASE (item);
        }
                
        /* release the list itself */
        OBJ_RELEASE (selectable);
        was_selectable_constructed = false;
    }

    /*
     * Now we have alist of components which successfully returned
     * their module struct.  One of these components has the best
     * priority. The rest have to be comm_unqueried to counter the
     * effects of file_query'ing them. Finalize happens only on
     * components which should are initialized.
     */
    if (NULL == best_component) {
       /*
        * This typically means that there was no component which was
        * able to run properly this time. So, we need to abort
        */
        OBJ_DESTRUCT(&queried);
        return OMPI_ERROR;
    }

    /*
     * We now have a list of components which have successfully
     * returned their priorities from the query. We now have to
     * unquery() those components which have not been selected and
     * init() the component which was selected
     */ 
    for (item = opal_list_remove_first(&queried);
         NULL != item;
         item = opal_list_remove_first(&queried)) {
        om = (queried_module_t *) item;
        if (om->om_component == best_component) {
           /*
            * this is the chosen component, we have to initialise the
            * module of this component.
            *
            * ANJU: a component might not have all the functions
            * defined.  Whereever a function pointer is null in the
            * module structure we need to fill it in with the base
            * structure function pointers. This is yet to be done
            */ 

            /*
             * We don return here coz we still need to go through and
             * elease the other objects
             */

            /*fill_null_pointers (om->om_module);*/
            file->f_fcoll = om->om_module;
            err = om->om_module->fcoll_module_init(file);
            file->f_fcoll_component = (mca_base_component_t *)best_component;
            /*
            printf ("SELECTED: %s\n", best_component->fcollm_version.mca_component_name);
            */
         } else {
            /*
             * this is not the "choosen one", finalize
             */
             if (NULL != om->om_component->fcollm_file_unquery) {
                /* unquery the component only if they have some clean
                 * up job to do. Components which are queried but do
                 * not actually do anything typically do not have a
                 * unquery. Hence this check is necessary
                 */
                 (void) om->om_component->fcollm_file_unquery(file);
                 opal_output_verbose(10, mca_fcoll_base_output,
                                     "select: component %s is not selected",
                                     om->om_component->fcollm_version.mca_component_name);
               } /* end if */
          } /* if not best component */
          OBJ_RELEASE(om);
    } /* traversing through the entire list */
    
    opal_output_verbose(10, mca_fcoll_base_output,
                       "select: component %s selected",
                        best_component->fcollm_version.mca_component_name);

    OBJ_DESTRUCT(&queried);

    return err;
}
Beispiel #16
0
int orte_ess_base_app_setup(bool db_restrict_local)
{
    int ret;
    char *error = NULL;
    opal_value_t kv;

    /*
     * stdout/stderr buffering
     * If the user requested to override the default setting then do
     * as they wish.
     */
    if( orte_ess_base_std_buffering > -1 ) {
        if( 0 == orte_ess_base_std_buffering ) {
            setvbuf(stdout, NULL, _IONBF, 0);
            setvbuf(stderr, NULL, _IONBF, 0);
        }
        else if( 1 == orte_ess_base_std_buffering ) {
            setvbuf(stdout, NULL, _IOLBF, 0);
            setvbuf(stderr, NULL, _IOLBF, 0);
        }
        else if( 2 == orte_ess_base_std_buffering ) {
            setvbuf(stdout, NULL, _IOFBF, 0);
            setvbuf(stderr, NULL, _IOFBF, 0);
        }
    }

    /* if I am an MPI app, we will let the MPI layer define and
     * control the opal_proc_t structure. Otherwise, we need to
     * do so here */
    if (ORTE_PROC_NON_MPI) {
        orte_process_info.super.proc_name = *(opal_process_name_t*)ORTE_PROC_MY_NAME;
        orte_process_info.super.proc_hostname = strdup(orte_process_info.nodename);
        orte_process_info.super.proc_flags = OPAL_PROC_ALL_LOCAL;
        orte_process_info.super.proc_arch = opal_local_arch;
        opal_proc_local_set(&orte_process_info.super);
    }

    /* get an async event base - we use the opal_async one so
     * we don't startup extra threads if not needed */
    orte_event_base = opal_progress_thread_init(NULL);
    progress_thread_running = true;
    /* open and setup the state machine */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_state_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_state_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_state_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_state_base_select";
        goto error;
    }

    /* open the errmgr */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_errmgr_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_errmgr_base_open";
        goto error;
    }

    /* setup my session directory */
    if (orte_create_session_dirs) {
        OPAL_OUTPUT_VERBOSE((2, orte_ess_base_framework.framework_output,
                             "%s setting up session dir with\n\ttmpdir: %s\n\thost %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             (NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base,
                             orte_process_info.nodename));
        if (ORTE_SUCCESS != (ret = orte_session_dir(true,
                                                    orte_process_info.tmpdir_base,
                                                    orte_process_info.nodename, NULL,
                                                    ORTE_PROC_MY_NAME))) {
            ORTE_ERROR_LOG(ret);
            error = "orte_session_dir";
            goto error;
        }
        /* Once the session directory location has been established, set
           the opal_output env file location to be in the
           proc-specific session directory. */
        opal_output_set_output_file_info(orte_process_info.proc_session_dir,
                                         "output-", NULL, NULL);
        /* store the session directory location */
        OBJ_CONSTRUCT(&kv, opal_value_t);
        kv.key = strdup(OPAL_PMIX_NSDIR);
        kv.type = OPAL_STRING;
        kv.data.string = strdup(orte_process_info.job_session_dir);
        if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_NAME, &kv))) {
            ORTE_ERROR_LOG(ret);
            OBJ_DESTRUCT(&kv);
            error = "opal pmix put job sessiondir";
            goto error;
        }
        OBJ_DESTRUCT(&kv);
        OBJ_CONSTRUCT(&kv, opal_value_t);
        kv.key = strdup(OPAL_PMIX_PROCDIR);
        kv.type = OPAL_STRING;
        kv.data.string = strdup(orte_process_info.proc_session_dir);
        if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_NAME, &kv))) {
            ORTE_ERROR_LOG(ret);
            OBJ_DESTRUCT(&kv);
            error = "opal pmix put proc sessiondir";
            goto error;
        }
        OBJ_DESTRUCT(&kv);
    }
    /* Setup the communication infrastructure */
    /*
     * OOB Layer
     */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_oob_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_oob_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_oob_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_oob_base_select";
        goto error;
    }
    /* Runtime Messaging Layer */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rml_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_rml_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_rml_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_rml_base_select";
        goto error;
    }
    /* Messaging QoS Layer */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_qos_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_qos_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_qos_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_qos_base_select";
        goto error;
    }
    /* setup the errmgr */
    if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_errmgr_base_select";
        goto error;
    }
    /* Routed system */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_routed_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_routed_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_routed_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_routed_base_select";
        goto error;
    }
    /*
     * Group communications
     */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_grpcomm_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_grpcomm_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_grpcomm_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_grpcomm_base_select";
        goto error;
    }
    /* enable communication via the rml */
    if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_rml.enable_comm";
        goto error;
    }
    /* setup the routed info  */
    if (ORTE_SUCCESS != (ret = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, NULL))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_routed.init_routes";
        goto error;
    }
#if OPAL_ENABLE_FT_CR == 1
    /*
     * Setup the SnapC
     */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_snapc_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_snapc_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_sstore_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_sstore_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_snapc_base_select(ORTE_PROC_IS_HNP, ORTE_PROC_IS_APP))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_snapc_base_select";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_sstore_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_sstore_base_select";
        goto error;
    }
    /* apps need the OPAL CR stuff */
    opal_cr_set_enabled(true);
#else
    opal_cr_set_enabled(false);
#endif
    /* Initalize the CR setup
     * Note: Always do this, even in non-FT builds.
     * If we don't some user level tools may hang.
     */
    if (ORTE_SUCCESS != (ret = orte_cr_init())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_cr_init";
        goto error;
    }
    /* open the distributed file system */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_dfs_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_dfs_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_dfs_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_dfs_base_select";
        goto error;
    }
    return ORTE_SUCCESS;
 error:
    if (!progress_thread_running) {
        /* can't send the help message, so ensure it
         * comes out locally
         */
        orte_show_help_finalize();
    }
    orte_show_help("help-orte-runtime.txt",
                   "orte_init:startup:internal-failure",
                   true, error, ORTE_ERROR_NAME(ret), ret);
    return ret;
}
Beispiel #17
0
/*
 * Clean up the op resources
 */
int ompi_op_finalize(void)
{
    /* clean up the intrinsic ops */
    OBJ_DESTRUCT(&ompi_mpi_op_replace);
    OBJ_DESTRUCT(&ompi_mpi_op_minloc);
    OBJ_DESTRUCT(&ompi_mpi_op_maxloc);
    OBJ_DESTRUCT(&ompi_mpi_op_bxor);
    OBJ_DESTRUCT(&ompi_mpi_op_lxor);
    OBJ_DESTRUCT(&ompi_mpi_op_bor);
    OBJ_DESTRUCT(&ompi_mpi_op_lor);
    OBJ_DESTRUCT(&ompi_mpi_op_band);
    OBJ_DESTRUCT(&ompi_mpi_op_land);
    OBJ_DESTRUCT(&ompi_mpi_op_prod);
    OBJ_DESTRUCT(&ompi_mpi_op_sum);
    OBJ_DESTRUCT(&ompi_mpi_op_min);
    OBJ_DESTRUCT(&ompi_mpi_op_max);
    OBJ_DESTRUCT(&ompi_mpi_op_null);

    /* Remove op F2C table */

    OBJ_RELEASE(ompi_op_f_to_c_table);

    /* All done */

    return OMPI_SUCCESS;
}
Beispiel #18
0
static int orte_rds_hostfile_query(orte_jobid_t job)
{
    opal_list_t existing;
    opal_list_t updates, rds_updates;
    opal_list_item_t *item;
    orte_rds_cell_desc_t *rds_item;
    orte_rds_cell_attr_t *new_attr;
    orte_ras_node_t *ras_item;
    int rc;

    if (orte_rds_hostfile_queried) {
        /* if we have already been queried, then
         * our info is on the registry, so just
         * return. Note that this restriction
         * may eventually be lifted - ideally, 
         * we might check to see if this is a
         * new file name and go ahead with the
         * query if so.
         */
        return ORTE_SUCCESS;
    }
    orte_rds_hostfile_queried = true;
    
    OBJ_CONSTRUCT(&existing, opal_list_t);
    OBJ_CONSTRUCT(&updates, opal_list_t);
    OBJ_CONSTRUCT(&rds_updates, opal_list_t);
    rc = orte_ras_base_node_query(&existing);
    if(ORTE_SUCCESS != rc) {
        goto cleanup;
    }

    rc = mca_base_param_find("rds", "hostfile", "path");
    mca_base_param_lookup_string(rc, &mca_rds_hostfile_component.path);

    rc = orte_rds_hostfile_parse(mca_rds_hostfile_component.path, &existing, &updates);
    if (ORTE_ERR_NOT_FOUND == rc) {
        if(mca_rds_hostfile_component.default_hostfile) {
            rc = ORTE_SUCCESS;
        } else {
            opal_show_help("help-rds-hostfile.txt", "rds:no-hostfile",
                           true,
                           mca_rds_hostfile_component.path);
        }
        goto cleanup;
    } else if (ORTE_SUCCESS != rc) {
        goto cleanup;
    }

    if ( !opal_list_is_empty(&updates) ) {

        /* Convert RAS update list to RDS update list */
        for ( ras_item  = (orte_ras_node_t*)opal_list_get_first(&updates);
              ras_item != (orte_ras_node_t*)opal_list_get_end(&updates);
              ras_item  = (orte_ras_node_t*)opal_list_get_next(ras_item)) {

            rds_item = OBJ_NEW(orte_rds_cell_desc_t);
            if (NULL == rds_item) {
                ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
                return ORTE_ERR_OUT_OF_RESOURCE;
            }

            rds_item->site  = strdup("Hostfile");
            rds_item->name  = strdup(ras_item->node_name);
            if (need_cellid) {
#if 0 /* JJH Repair when cellid's are fixed */
                /* Create a new cellid for this hostfile */
                rc = orte_ns.create_cellid(&local_cellid, rds_item->site, rds_item->name);
                if (ORTE_SUCCESS != rc) {
                    ORTE_ERROR_LOG(rc);
                    return rc;
                }
#endif
                local_cellid = 0;
                need_cellid = false;
            }

            rds_item->cellid      = local_cellid;
            ras_item->node_cellid = local_cellid;

            new_attr = OBJ_NEW(orte_rds_cell_attr_t);
            if (NULL == new_attr) {
                ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
                return ORTE_ERR_OUT_OF_RESOURCE;
            }
            new_attr->keyval.key          = strdup(ORTE_RDS_NAME);
            new_attr->keyval.value = OBJ_NEW(orte_data_value_t);
            if (NULL == new_attr->keyval.value) {
                ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
                return ORTE_ERR_OUT_OF_RESOURCE;
            }
            new_attr->keyval.value->type   = ORTE_STRING;
            new_attr->keyval.value->data   = strdup(ras_item->node_name);
            opal_list_append(&(rds_item->attributes), &new_attr->super);

            new_attr = OBJ_NEW(orte_rds_cell_attr_t);
            if (NULL == new_attr) {
                ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
                return ORTE_ERR_OUT_OF_RESOURCE;
            }
            new_attr->keyval.key          = strdup(ORTE_CELLID_KEY);
            new_attr->keyval.value = OBJ_NEW(orte_data_value_t);
            if (NULL == new_attr->keyval.value) {
                ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
                return ORTE_ERR_OUT_OF_RESOURCE;
            }
            new_attr->keyval.value->type   = ORTE_CELLID;
            if (ORTE_SUCCESS != (rc = orte_dss.copy(&(new_attr->keyval.value->data), &(rds_item->cellid), ORTE_CELLID))) {
                ORTE_ERROR_LOG(rc);
                return rc;
            }
            opal_list_append(&(rds_item->attributes), &new_attr->super);

            opal_list_append(&rds_updates, &rds_item->super);
        }

        /* Insert the new node into the RDS */
        rc = orte_rds.store_resource(&rds_updates);
        if (ORTE_SUCCESS != rc) {
            goto cleanup;
        }

        /* Then the RAS, since we can assume that any
         * resources listed in the hostfile have been
         * already allocated for our use.
         */
        rc = orte_ras_base_node_insert(&updates);
        if (ORTE_SUCCESS != rc) {
            goto cleanup;
        }
        
        /* and now, indicate that ORTE should override any oversubscribed conditions
         * based on local hardware limits since the user (a) might not have
         * provided us any info on the #slots for a node, and (b) the user
         * might have been wrong! If we don't check the number of local physical
         * processors, then we could be too aggressive on our sched_yield setting
         * and cause performance problems.
         */
        rc = orte_ras_base_set_oversubscribe_override(job);
        if (ORTE_SUCCESS != rc) {
            goto cleanup;
        }
    }

cleanup:
    if (NULL != mca_rds_hostfile_component.path) {
        free(mca_rds_hostfile_component.path);
        mca_rds_hostfile_component.path = NULL;
    }

    while(NULL != (item = opal_list_remove_first(&existing))) {
        OBJ_RELEASE(item);
    }

    while(NULL != (item = opal_list_remove_first(&updates))) {
        OBJ_RELEASE(item);
    }

    while (NULL != (rds_item = (orte_rds_cell_desc_t*)opal_list_remove_first(&rds_updates))) {
        while (NULL != (new_attr = (orte_rds_cell_attr_t*)opal_list_remove_first(&(rds_item->attributes)))) {
            OBJ_RELEASE(new_attr);
        }
        OBJ_RELEASE(rds_item);
    }

    OBJ_DESTRUCT(&existing);
    OBJ_DESTRUCT(&updates);
    OBJ_DESTRUCT(&rds_updates);

    return rc;
}
Beispiel #19
0
static void ompi_osc_rdma_replyreq_destruct(ompi_osc_rdma_replyreq_t *replyreq)
{
    OBJ_DESTRUCT(&(replyreq->rep_target_convertor));
}
static int
portals4_close(void)
{
    int ret;

    OBJ_DESTRUCT(&mca_coll_portals4_component.requests);

    if (!PtlHandleIsEqual(mca_coll_portals4_component.zero_md_h, PTL_INVALID_HANDLE)) {
        ret = PtlMDRelease(mca_coll_portals4_component.zero_md_h);
        if (PTL_OK != ret) {
            opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                    "%s:%d: PtlMDRelease failed: %d\n",
                    __FILE__, __LINE__, ret);
        }
    }
    mca_coll_portals4_component.zero_md_h = PTL_INVALID_HANDLE;

    if (!PtlHandleIsEqual(mca_coll_portals4_component.data_md_h, PTL_INVALID_HANDLE)) {
        ret = PtlMDRelease(mca_coll_portals4_component.data_md_h);
        if (PTL_OK != ret) {
            opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                    "%s:%d: PtlMDRelease failed: %d\n",
                    __FILE__, __LINE__, ret);
        }
    }
    mca_coll_portals4_component.data_md_h = PTL_INVALID_HANDLE;

    if (!PtlHandleIsEqual(mca_coll_portals4_component.finish_me_h, PTL_INVALID_HANDLE)) {
        ret = PtlMEUnlink(mca_coll_portals4_component.finish_me_h);
        if (PTL_OK != ret) {
            opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                    "%s:%d: PtlMEUnlink failed: %d\n",
                    __FILE__, __LINE__, ret);
        }
    }
    if (!PtlHandleIsEqual(mca_coll_portals4_component.unex_me_h, PTL_INVALID_HANDLE)) {
        ret = PtlMEUnlink(mca_coll_portals4_component.unex_me_h);
        if (PTL_OK != ret) {
            opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                    "%s:%d: PtlMEUnlink failed: %d\n",
                    __FILE__, __LINE__, ret);
        }
    }
    if (mca_coll_portals4_component.finish_pt_idx >= 0) {
        ret = PtlPTFree(mca_coll_portals4_component.ni_h, mca_coll_portals4_component.finish_pt_idx);
        if (PTL_OK != ret) {
            opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                    "%s:%d: PtlPTFree failed: %d\n",
                    __FILE__, __LINE__, ret);
        }
    }
    if (mca_coll_portals4_component.pt_idx >= 0) {
        ret = PtlPTFree(mca_coll_portals4_component.ni_h, mca_coll_portals4_component.pt_idx);
        if (PTL_OK != ret) {
            opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                    "%s:%d: PtlPTFree failed: %d\n",
                    __FILE__, __LINE__, ret);
        }
    }
    if (!PtlHandleIsEqual(mca_coll_portals4_component.eq_h, PTL_INVALID_HANDLE)) {
        ret = PtlEQFree(mca_coll_portals4_component.eq_h);
        if (PTL_OK != ret) {
            opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                    "%s:%d: PtlEQFree failed: %d\n",
                    __FILE__, __LINE__, ret);
        }
    }
    if (!PtlHandleIsEqual(mca_coll_portals4_component.ni_h, PTL_INVALID_HANDLE)) {
        ret = PtlNIFini(mca_coll_portals4_component.ni_h);
        if (PTL_OK != ret) {
            opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                    "%s:%d: PtlNIFini failed: %d\n",
                    __FILE__, __LINE__, ret);
        }

        PtlFini();
    }

    opal_progress_unregister(portals4_progress);

    return OMPI_SUCCESS;
}
Beispiel #21
0
int main(int argc, char *argv[])
{
    int ret = 0;
    bool want_help = false;
    bool cmd_error = false;
    bool acted = false;
    bool want_all = false;
    char **app_env = NULL, **global_env = NULL;
    int i, len;
    char *str;
    
    /* Initialize the argv parsing handle */
    if (OMPI_SUCCESS != opal_init_util(&argc, &argv)) {
        orte_show_help("help-ompi_info.txt", "lib-call-fail", true, 
                       "opal_init_util", __FILE__, __LINE__, NULL);
        exit(ret);
    }
    
    ompi_info_cmd_line = OBJ_NEW(opal_cmd_line_t);
    if (NULL == ompi_info_cmd_line) {
        ret = errno;
        orte_show_help("help-ompi_info.txt", "lib-call-fail", true, 
                       "opal_cmd_line_create", __FILE__, __LINE__, NULL);
        opal_finalize_util();
        exit(ret);
    }
    
    opal_cmd_line_make_opt3(ompi_info_cmd_line, 'v', NULL, "version", 2, 
                            "Show version of Open MPI or a component.  The first parameter can be the keywords \"ompi\" or \"all\", a framework name (indicating all components in a framework), or a framework:component string (indicating a specific component).  The second parameter can be one of: full, major, minor, release, greek, svn.");
    opal_cmd_line_make_opt3(ompi_info_cmd_line, '\0', NULL, "param", 2, 
                            "Show MCA parameters.  The first parameter is the framework (or the keyword \"all\"); the second parameter is the specific component name (or the keyword \"all\").");
    opal_cmd_line_make_opt3(ompi_info_cmd_line, '\0', NULL, "internal", 0, 
                            "Show internal MCA parameters (not meant to be modified by users)");
    opal_cmd_line_make_opt3(ompi_info_cmd_line, '\0', NULL, "path", 1, 
                            "Show paths that Open MPI was configured with.  Accepts the following parameters: prefix, bindir, libdir, incdir, mandir, pkglibdir, sysconfdir");
    opal_cmd_line_make_opt3(ompi_info_cmd_line, '\0', NULL, "arch", 0, 
                            "Show architecture Open MPI was compiled on");
    opal_cmd_line_make_opt3(ompi_info_cmd_line, 'c', NULL, "config", 0, 
                            "Show configuration options");
    opal_cmd_line_make_opt3(ompi_info_cmd_line, 'h', NULL, "help", 0, 
                            "Show this help message");
    opal_cmd_line_make_opt3(ompi_info_cmd_line, '\0', NULL, "ompi_info_pretty", 0, 
                            "When used in conjunction with other parameters, the output is displayed in 'ompi_info_prettyprint' format (default)");
    opal_cmd_line_make_opt3(ompi_info_cmd_line, '\0', NULL, "parsable", 0, 
                            "When used in conjunction with other parameters, the output is displayed in a machine-parsable format");
    opal_cmd_line_make_opt3(ompi_info_cmd_line, '\0', NULL, "parseable", 0, 
                            "Synonym for --parsable");
    opal_cmd_line_make_opt3(ompi_info_cmd_line, '\0', NULL, "hostname", 0, 
                            "Show the hostname that Open MPI was configured "
                            "and built on");
    opal_cmd_line_make_opt3(ompi_info_cmd_line, 'a', NULL, "all", 0, 
                            "Show all configuration options and MCA parameters");
    
    /* Call some useless functions in order to guarantee to link in some
     * global variables.  Only check the return value so that the
     * compiler doesn't optimize out the useless function.
     */
    
    if (OMPI_SUCCESS != ompi_comm_link_function()) {
        /* Stop .. or I'll say stop again! */
        ++ret;
    } else {
        --ret;
    }
    
    /* set our threading level */
    opal_set_using_threads(false);
    
    /* Get MCA parameters, if any */
    
    if( OMPI_SUCCESS != mca_base_open() ) {
        orte_show_help("help-ompi_info.txt", "lib-call-fail", true, "mca_base_open", __FILE__, __LINE__ );
        OBJ_RELEASE(ompi_info_cmd_line);
        opal_finalize_util();
        exit(1);
    }
    mca_base_cmd_line_setup(ompi_info_cmd_line);
    
    /* Do the parsing */
    
    if (OMPI_SUCCESS != opal_cmd_line_parse(ompi_info_cmd_line, false, argc, argv)) {
        cmd_error = true;
    }
    if (!cmd_error && 
        (opal_cmd_line_is_taken(ompi_info_cmd_line, "help") || 
         opal_cmd_line_is_taken(ompi_info_cmd_line, "h"))) {
        want_help = true;
    }
    if (cmd_error || want_help) {
        char *usage = opal_cmd_line_get_usage_msg(ompi_info_cmd_line);
        orte_show_help("help-ompi_info.txt", "usage", true, usage);
        free(usage);
        mca_base_close();
        OBJ_RELEASE(ompi_info_cmd_line);
        opal_finalize_util();
        exit(cmd_error ? 1 : 0);
    }
    
    mca_base_cmd_line_process_args(ompi_info_cmd_line, &app_env, &global_env);
    
    /* putenv() all the stuff that we got back from env (in case the
     * user specified some --mca params on the command line).  This
     * creates a memory leak, but that's unfortunately how putenv()
     * works.  :-(
     */
    
    len = opal_argv_count(app_env);
    for (i = 0; i < len; ++i) {
        putenv(app_env[i]);
    }
    len = opal_argv_count(global_env);
    for (i = 0; i < len; ++i) {
        putenv(global_env[i]);
    }
    
    /* setup the mca_types array */
    OBJ_CONSTRUCT(&mca_types, opal_pointer_array_t);
    opal_pointer_array_init(&mca_types, 256, INT_MAX, 128);
    
    opal_pointer_array_add(&mca_types, "mca");
    opal_pointer_array_add(&mca_types, "mpi");
    opal_pointer_array_add(&mca_types, "orte");
    opal_pointer_array_add(&mca_types, "opal");
    
    opal_pointer_array_add(&mca_types, "filter");
    opal_pointer_array_add(&mca_types, "backtrace");
    opal_pointer_array_add(&mca_types, "memchecker");
    opal_pointer_array_add(&mca_types, "memory");
    opal_pointer_array_add(&mca_types, "paffinity");
    opal_pointer_array_add(&mca_types, "carto");
    opal_pointer_array_add(&mca_types, "shmem");
    opal_pointer_array_add(&mca_types, "maffinity");
    opal_pointer_array_add(&mca_types, "timer");
    opal_pointer_array_add(&mca_types, "installdirs");
    opal_pointer_array_add(&mca_types, "sysinfo");
    opal_pointer_array_add(&mca_types, "hwloc");
#if OPAL_ENABLE_FT_CR == 1
    opal_pointer_array_add(&mca_types, "crs");
#endif
    opal_pointer_array_add(&mca_types, "dpm");
    opal_pointer_array_add(&mca_types, "pubsub");
    opal_pointer_array_add(&mca_types, "allocator");
    opal_pointer_array_add(&mca_types, "coll");
    opal_pointer_array_add(&mca_types, "io");
    opal_pointer_array_add(&mca_types, "mpool");
    opal_pointer_array_add(&mca_types, "pml");
    opal_pointer_array_add(&mca_types, "bml");
    opal_pointer_array_add(&mca_types, "rcache");
    opal_pointer_array_add(&mca_types, "btl");
    opal_pointer_array_add(&mca_types, "mtl");
    opal_pointer_array_add(&mca_types, "topo");
    opal_pointer_array_add(&mca_types, "osc");
    opal_pointer_array_add(&mca_types, "op");
    opal_pointer_array_add(&mca_types, "common");
#if OPAL_ENABLE_FT_CR == 1
    opal_pointer_array_add(&mca_types, "crcp");
#endif
    
#if !ORTE_DISABLE_FULL_SUPPORT
    opal_pointer_array_add(&mca_types, "iof");
    opal_pointer_array_add(&mca_types, "oob");
    opal_pointer_array_add(&mca_types, "odls");
    opal_pointer_array_add(&mca_types, "ras");
    opal_pointer_array_add(&mca_types, "rmaps");
    opal_pointer_array_add(&mca_types, "rml");
    opal_pointer_array_add(&mca_types, "routed");
    opal_pointer_array_add(&mca_types, "plm");
#if OPAL_ENABLE_FT_CR == 1
    opal_pointer_array_add(&mca_types, "snapc");
#endif
    opal_pointer_array_add(&mca_types, "filem");
#endif
    /* these are always included */
    opal_pointer_array_add(&mca_types, "errmgr");
    opal_pointer_array_add(&mca_types, "ess");
    opal_pointer_array_add(&mca_types, "grpcomm");
    opal_pointer_array_add(&mca_types, "notifier");
    
    /* Execute the desired action(s) */
    
    if (opal_cmd_line_is_taken(ompi_info_cmd_line, "ompi_info_pretty")) {
        ompi_info_pretty = true;
    } else if (opal_cmd_line_is_taken(ompi_info_cmd_line, "parsable") || opal_cmd_line_is_taken(ompi_info_cmd_line, "parseable")) {
        ompi_info_pretty = false;
    }
    
    want_all = opal_cmd_line_is_taken(ompi_info_cmd_line, "all");
    if (want_all || opal_cmd_line_is_taken(ompi_info_cmd_line, "version")) {
        ompi_info_do_version(want_all, ompi_info_cmd_line);
        acted = true;
    }
    if (want_all || opal_cmd_line_is_taken(ompi_info_cmd_line, "path")) {
        ompi_info_do_path(want_all, ompi_info_cmd_line);
        acted = true;
    }
    if (want_all || opal_cmd_line_is_taken(ompi_info_cmd_line, "arch")) {
        ompi_info_do_arch();
        acted = true;
    }
    if (want_all || opal_cmd_line_is_taken(ompi_info_cmd_line, "hostname")) {
        ompi_info_do_hostname();
        acted = true;
    }
    if (want_all || opal_cmd_line_is_taken(ompi_info_cmd_line, "config")) {
        ompi_info_do_config(true);
        acted = true;
    }
    if (want_all || opal_cmd_line_is_taken(ompi_info_cmd_line, "param")) {
        ompi_info_do_params(want_all, opal_cmd_line_is_taken(ompi_info_cmd_line, "internal"));
        acted = true;
    }
    
    /* If no command line args are specified, show default set */
    
    if (!acted) {
        ompi_info_show_ompi_version(ompi_info_ver_full);
        ompi_info_show_path(ompi_info_path_prefix, opal_install_dirs.prefix);
        ompi_info_do_arch();
        ompi_info_do_hostname();
        ompi_info_do_config(false);
        ompi_info_open_components();
        for (i = 0; i < mca_types.size; ++i) {
            if (NULL == (str = (char*)opal_pointer_array_get_item(&mca_types, i))) {
                continue;
            }
            if (0 != strcmp("mpi", str)) {
                ompi_info_show_component_version(str, ompi_info_component_all, 
                                       ompi_info_ver_full, ompi_info_type_all);
            }
        }
    }
    
    /* All done */
    
    if (NULL != app_env) {
        opal_argv_free(app_env);
    }
    if (NULL != global_env) {
        opal_argv_free(global_env);
    }
    ompi_info_close_components();
    OBJ_RELEASE(ompi_info_cmd_line);
    OBJ_DESTRUCT(&mca_types);
    mca_base_close();
    
    opal_finalize_util();
    
    return 0;
}
static int mca_oob_ud_component_close (void)
{
    OPAL_OUTPUT_VERBOSE((5, mca_oob_base_output, "%s oob:ud:component_close entering",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));

    OBJ_DESTRUCT(&mca_oob_ud_component.ud_devices);
    OBJ_DESTRUCT(&mca_oob_ud_component.ud_active_sends);
    OBJ_DESTRUCT(&mca_oob_ud_component.ud_pending_recvs);

    OBJ_DESTRUCT(&mca_oob_ud_component.ud_active_recvs);
    OBJ_DESTRUCT(&mca_oob_ud_component.ud_event_queued_reqs);
    OBJ_DESTRUCT(&mca_oob_ud_component.ud_unexpected_recvs);

    OBJ_DESTRUCT(&mca_oob_ud_component.ud_lock);
    OBJ_DESTRUCT(&mca_oob_ud_component.ud_match_lock);

    OBJ_DESTRUCT(&mca_oob_ud_component.ud_peers);

    OBJ_DESTRUCT(&mca_oob_ud_component.ud_completed);
    OBJ_DESTRUCT(&mca_oob_ud_component.ud_event_processing_msgs);

    return ORTE_SUCCESS;
}
Beispiel #23
0
static int s1_fence(opal_list_t *procs, int collect_data)
{
    int rc;
    int32_t i;
    opal_value_t *kp, kvn;
    opal_hwloc_locality_t locality;
    opal_process_name_t s1_pname;

    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                        "%s pmix:s1 called fence",
                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));

    /* use the PMI barrier function */
    if (PMI_SUCCESS != (rc = PMI_Barrier())) {
        OPAL_PMI_ERROR(rc, "PMI_Barrier");
        return OPAL_ERROR;
    }

    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                        "%s pmix:s1 barrier complete",
                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));

    /* get the modex data from each local process and set the
     * localities to avoid having the MPI layer fetch data
     * for every process in the job */
    s1_pname.jobid = OPAL_PROC_MY_NAME.jobid;
    if (!got_modex_data) {
        got_modex_data = true;
        /* we only need to set locality for each local rank as "not found"
         * equates to "non-local" */
        for (i=0; i < nlranks; i++) {
            s1_pname.vpid = lranks[i];
            rc = opal_pmix_base_cache_keys_locally(&s1_pname, OPAL_PMIX_CPUSET,
                                                   &kp, pmix_kvs_name, pmix_vallen_max, kvs_get);
            if (OPAL_SUCCESS != rc) {
                OPAL_ERROR_LOG(rc);
                return rc;
            }
            if (NULL == kp || NULL == kp->data.string) {
                /* if we share a node, but we don't know anything more, then
                 * mark us as on the node as this is all we know
                 */
                locality = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE;
            } else {
                /* determine relative location on our node */
                locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology,
                                                                 opal_process_info.cpuset,
                                                                 kp->data.string);
            }
            if (NULL != kp) {
                OBJ_RELEASE(kp);
            }
            OPAL_OUTPUT_VERBOSE((1, opal_pmix_base_framework.framework_output,
                                 "%s pmix:s1 proc %s locality %s",
                                 OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
                                 OPAL_NAME_PRINT(s1_pname),
                                 opal_hwloc_base_print_locality(locality)));

            OBJ_CONSTRUCT(&kvn, opal_value_t);
            kvn.key = strdup(OPAL_PMIX_LOCALITY);
            kvn.type = OPAL_UINT16;
            kvn.data.uint16 = locality;
            opal_pmix_base_store(&s1_pname, &kvn);
            OBJ_DESTRUCT(&kvn);
        }
    }

    return OPAL_SUCCESS;
}
static void mca_pml_csum_comm_proc_destruct(mca_pml_csum_comm_proc_t* proc)
{
    OBJ_DESTRUCT(&proc->frags_cant_match);
    OBJ_DESTRUCT(&proc->specific_receives);
    OBJ_DESTRUCT(&proc->unexpected_frags);
}
int orte_routed_base_register_sync(bool setup)
{
    opal_buffer_t buffer;
    int rc;
    orte_daemon_cmd_flag_t command=ORTE_DAEMON_SYNC_BY_PROC;
    char *rml_uri;
    
    OPAL_OUTPUT_VERBOSE((5, orte_routed_base_output,
                         "%s registering sync to daemon %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_NAME_PRINT(ORTE_PROC_MY_DAEMON)));
    
    /* we need to get the oob to establish
     * the connection - the oob will leave the connection "alive"
     * thereafter so we can communicate readily
     */
    
    OBJ_CONSTRUCT(&buffer, opal_buffer_t);
    
    /* if we are setting up, tell the daemon to send back a nidmap */
    if (setup) {
        command = ORTE_DAEMON_SYNC_WANT_NIDMAP;
    }


    /* tell the daemon to sync */
    if (ORTE_SUCCESS != (rc = opal_dss.pack(&buffer, &command, 1, ORTE_DAEMON_CMD))) {
        ORTE_ERROR_LOG(rc);
        OBJ_DESTRUCT(&buffer);
        return rc;
    }
    
    /* add our contact info to the buffer so the daemon can explicitly
     * store it
     */
    rml_uri = orte_rml.get_contact_info();
    if (ORTE_SUCCESS != (rc = opal_dss.pack(&buffer, &rml_uri, 1, OPAL_STRING))) {
        ORTE_ERROR_LOG(rc);
        OBJ_DESTRUCT(&buffer);
        free(rml_uri);
        return rc;
    }
    if (NULL != rml_uri) free(rml_uri);
    
    /* send the sync command to our daemon */
    if (0 > (rc = orte_rml.send_buffer(ORTE_PROC_MY_DAEMON, &buffer, ORTE_RML_TAG_DAEMON, 0))) {
        ORTE_ERROR_LOG(rc);
        OBJ_DESTRUCT(&buffer);
        return rc;
    }
    OBJ_DESTRUCT(&buffer);
    
    /* get the ack - need this to ensure that the sync communication
     * gets serviced by the event library on the orted prior to the
     * process exiting
     */
    OPAL_OUTPUT_VERBOSE((5, orte_routed_base_output,
                         "%s registering sync waiting for ack",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    sync_recvd = false;
    rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_SYNC,
                                 ORTE_RML_NON_PERSISTENT, report_sync, NULL);
    if (rc != ORTE_SUCCESS && rc != ORTE_ERR_NOT_IMPLEMENTED) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }
    
    ORTE_PROGRESSED_WAIT(sync_recvd, 0, 1);
    
    OPAL_OUTPUT_VERBOSE((5, orte_routed_base_output,
                         "%s registering sync ack recvd",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    return ORTE_SUCCESS;
}
Beispiel #26
0
int main(int argc, char *argv[])
{
    int ret = 0;
    int fd;
    opal_cmd_line_t *cmd_line = NULL;
    char *log_path = NULL;
    char log_file[PATH_MAX];
    char *jobidstring;
    orte_gpr_value_t *value;
    char *segment;
    int i;
    orte_buffer_t answer;
    char *umask_str;

    /* Allow the PLS starters to pass us a umask to use, if required.
       Most starters by default can do something sane with the umask,
       but some (like TM) do not pass on the umask but instead inherit
       it form the root level process starter.  This has to happen
       before opal_init and everything else so that the couple of
       places that stash a umask end up with the correct value.  Only
       do it here (and not in orte_daemon) mainly to make it clear
       that this should only happen when starting an orted for the
       first time.  All startes I'm aware of that don't require an
       orted are smart enough to pass on a reasonable umask, so they
       wouldn't need this functionality anyway. */
    umask_str = getenv("ORTE_DAEMON_UMASK_VALUE");
    if (NULL != umask_str) {
        char *endptr;
        long mask = strtol(umask_str, &endptr, 8);
        if ((! (0 == mask && (EINVAL == errno || ERANGE == errno))) &&
            (*endptr == '\0')) {
            umask(mask);
        }
    }

    /* initialize the globals */
    memset(&orted_globals, 0, sizeof(orted_globals_t));

    /* Ensure that enough of OPAL is setup for us to be able to run */
    if (OPAL_SUCCESS != opal_init_util()) {
        fprintf(stderr, "OPAL failed to initialize -- orted aborting\n");
        exit(1);
    }

    /* save the environment for use when launching application processes */
    orted_globals.saved_environ = opal_argv_copy(environ);

    /* setup mca param system */
    mca_base_param_init();
    
    /* setup to check common command line options that just report and die */
    cmd_line = OBJ_NEW(opal_cmd_line_t);
    opal_cmd_line_create(cmd_line, orte_cmd_line_opts);
    if (ORTE_SUCCESS != (ret = opal_cmd_line_parse(cmd_line, false,
                                                   argc, argv))) {
        char *args = NULL;
        args = opal_cmd_line_get_usage_msg(cmd_line);
        opal_show_help("help-orted.txt", "orted:usage", false,
                       argv[0], args);
        free(args);
        return ret;
    }

    /* check for help request */
    if (orted_globals.help) {
        char *args = NULL;
        args = opal_cmd_line_get_usage_msg(cmd_line);
        opal_show_help("help-orted.txt", "orted:usage", false,
                       argv[0], args);
        free(args);
        return 1;
    }

    /* see if we were directed to separate from current session */
    if (orted_globals.set_sid) {
        setsid();
    }
    
    /* see if they want us to spin until they can connect a debugger to us */
    i=0;
    while (orted_globals.spin) {
        i++;
        if (1000 < i) i=0;        
    }
    
    /* Okay, now on to serious business! */
    
    /* Ensure the process info structure in instantiated and initialized
     * and set the daemon flag to true
     */
    orte_process_info.daemon = true;

    /*
     * If the daemon was given a name on the command line, need to set the
     * proper indicators in the environment so the name discovery service
     * can find it
     */
    if (orted_globals.name) {
        if (ORTE_SUCCESS != (ret = opal_setenv("OMPI_MCA_ns_nds",
                                              "env", true, &environ))) {
            opal_show_help("help-orted.txt", "orted:environ", false,
                           "OMPI_MCA_ns_nds", "env", ret);
            return ret;
        }
        if (ORTE_SUCCESS != (ret = opal_setenv("OMPI_MCA_ns_nds_name",
                                  orted_globals.name, true, &environ))) {
            opal_show_help("help-orted.txt", "orted:environ", false,
                           "OMPI_MCA_ns_nds_name", orted_globals.name, ret);
            return ret;
        }
        /* the following values are meaningless to the daemon, but may have
         * been passed in anyway. we set them here because the nds_env component
         * requires that they be set
         */
        if (ORTE_SUCCESS != (ret = opal_setenv("OMPI_MCA_ns_nds_vpid_start",
                                  orted_globals.vpid_start, true, &environ))) {
            opal_show_help("help-orted.txt", "orted:environ", false,
                           "OMPI_MCA_ns_nds_vpid_start", orted_globals.vpid_start, ret);
            return ret;
        }
        if (ORTE_SUCCESS != (ret = opal_setenv("OMPI_MCA_ns_nds_num_procs",
                                  orted_globals.num_procs, true, &environ))) {
            opal_show_help("help-orted.txt", "orted:environ", false,
                           "OMPI_MCA_ns_nds_num_procs", orted_globals.num_procs, ret);
            return ret;
        }
    }
    if (orted_globals.ns_nds) {
        if (ORTE_SUCCESS != (ret = opal_setenv("OMPI_MCA_ns_nds",
                                               orted_globals.ns_nds, true, &environ))) {
            opal_show_help("help-orted.txt", "orted:environ", false,
                           "OMPI_MCA_ns_nds", "env", ret);
            return ret;
        }
    }

    /* turn on debug if debug_file is requested so output will be generated */
    if (orted_globals.debug_daemons_file) {
        orted_globals.debug_daemons = true;
    }

    /* detach from controlling terminal
     * otherwise, remain attached so output can get to us
     */
    if(orted_globals.debug == false &&
       orted_globals.debug_daemons == false &&
       orted_globals.no_daemonize == false) {
        opal_daemon_init(NULL);
    }

    /* Intialize the Open RTE */
    /* Set the flag telling orte_init that I am NOT a
     * singleton, but am "infrastructure" - prevents setting
     * up incorrect infrastructure that only a singleton would
     * require
     */
    if (ORTE_SUCCESS != (ret = orte_init(true))) {
        opal_show_help("help-orted.txt", "orted:init-failure", false,
                       "orte_init()", ret);
        return ret;
    }

    /* Set signal handlers to catch kill signals so we can properly clean up
     * after ourselves. 
     */
    opal_event_set(&term_handler, SIGTERM, OPAL_EV_SIGNAL,
                   signal_callback, NULL);
    opal_event_add(&term_handler, NULL);
    opal_event_set(&int_handler, SIGINT, OPAL_EV_SIGNAL,
                   signal_callback, NULL);
    opal_event_add(&int_handler, NULL);

    /* if requested, report my uri to the indicated pipe */
    if (orted_globals.uri_pipe > 0) {
        write(orted_globals.uri_pipe, orte_universe_info.seed_uri,
                    strlen(orte_universe_info.seed_uri)+1); /* need to add 1 to get the NULL */
        close(orted_globals.uri_pipe);
    }

    /* setup stdout/stderr */
    if (orted_globals.debug_daemons_file) {
        /* if we are debugging to a file, then send stdout/stderr to
         * the orted log file
         */

        /* get my jobid */
        if (ORTE_SUCCESS != (ret = orte_ns.get_jobid_string(&jobidstring,
                                        orte_process_info.my_name))) {
            ORTE_ERROR_LOG(ret);
            return ret;
        }

        /* define a log file name in the session directory */
        sprintf(log_file, "output-orted-%s-%s.log",
                jobidstring, orte_system_info.nodename);
        log_path = opal_os_path(false,
                                orte_process_info.tmpdir_base,
                                orte_process_info.top_session_dir,
                                log_file,
                                NULL);

        fd = open(log_path, O_RDWR|O_CREAT|O_TRUNC, 0640);
        if (fd < 0) {
            /* couldn't open the file for some reason, so
             * just connect everything to /dev/null
             */
             fd = open("/dev/null", O_RDWR|O_CREAT|O_TRUNC, 0666);
        } else {
            dup2(fd, STDOUT_FILENO);
            dup2(fd, STDERR_FILENO);
            if(fd != STDOUT_FILENO && fd != STDERR_FILENO) {
               close(fd);
            }
        }
    }

    /* output a message indicating we are alive, our name, and our pid
     * for debugging purposes
     */
    if (orted_globals.debug_daemons) {
        fprintf(stderr, "Daemon [%ld,%ld,%ld] checking in as pid %ld on host %s\n",
                ORTE_NAME_ARGS(orte_process_info.my_name), (long)orte_process_info.pid,
                orte_system_info.nodename);
    }

    /* setup the thread lock and condition variables */
    OBJ_CONSTRUCT(&orted_globals.mutex, opal_mutex_t);
    OBJ_CONSTRUCT(&orted_globals.condition, opal_condition_t);

    /* register the daemon main receive functions */
    ret = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_PLS_ORTED, ORTE_RML_NON_PERSISTENT, orte_daemon_recv_pls, NULL);
    if (ret != ORTE_SUCCESS && ret != ORTE_ERR_NOT_IMPLEMENTED) {
        ORTE_ERROR_LOG(ret);
        return ret;
    }
    ret = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DAEMON, ORTE_RML_NON_PERSISTENT, orte_daemon_recv, NULL);
    if (ret != ORTE_SUCCESS && ret != ORTE_ERR_NOT_IMPLEMENTED) {
        ORTE_ERROR_LOG(ret);
        return ret;
    }

    /* check to see if I'm a bootproxy */
    if (orted_globals.bootproxy) { /* perform bootproxy-specific things */
        if (orted_globals.mpi_call_yield > 0) {
            char *var;
            var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
            opal_setenv(var, "1", true, &environ);
        }

        /* attach a subscription to the orted standard trigger so I can get
         * information on the processes I am to locally launch as soon as all
         * the orteds for this job are started.
         *
         * Once the registry gets to 2.0, we will be able to setup the
         * subscription so we only get our own launch info back. In the interim,
         * we setup the subscription so that ALL launch info for this job
         * is returned. We will then have to parse that message to get our
         * own local launch info.
         *
         * Since we have chosen this approach, we can take advantage of the
         * fact that the callback function will directly receive this data.
         * By setting up that callback function to actually perform the launch
         * based on the received data, all we have to do here is go into our
         * conditioned wait until the job completes!
         *
         * Sometimes, life can be good! :-)
         */

        /** put all this registry stuff in a compound command to limit communications */
        if (ORTE_SUCCESS != (ret = orte_gpr.begin_compound_cmd())) {
            ORTE_ERROR_LOG(ret);
            return ret;
        }

        /* let the local launcher setup a subscription for its required data. We
         * pass the local_cb_launcher function so that this gets called back - this
         * allows us to wakeup the orted so it can exit cleanly if the callback
         * generates an error
         */
        if (ORTE_SUCCESS != (ret = orte_odls.subscribe_launch_data(orted_globals.bootproxy, orted_local_cb_launcher))) {
            ORTE_ERROR_LOG(ret);
            return ret;
        }

        /* get the job segment name */
        if (ORTE_SUCCESS != (ret = orte_schema.get_job_segment_name(&segment, orted_globals.bootproxy))) {
            ORTE_ERROR_LOG(ret);
            return ret;
        }

       /** increment the orted stage gate counter */
        if (ORTE_SUCCESS != (ret = orte_gpr.create_value(&value, ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_AND,
                                                         segment, 1, 1))) {
            ORTE_ERROR_LOG(ret);
            return ret;
        }
        free(segment); /* done with this now */

        value->tokens[0] = strdup(ORTE_JOB_GLOBALS);
        if (ORTE_SUCCESS != (ret = orte_gpr.create_keyval(&(value->keyvals[0]), ORTED_LAUNCH_STAGE_GATE_CNTR, ORTE_UNDEF, NULL))) {
            ORTE_ERROR_LOG(ret);
            return ret;
        }

        /* do the increment */
        if (ORTE_SUCCESS != (ret = orte_gpr.increment_value(value))) {
            ORTE_ERROR_LOG(ret);
            return ret;
        }
        OBJ_RELEASE(value);  /* done with this now */

        /** send the compound command */
        if (ORTE_SUCCESS != (ret = orte_gpr.exec_compound_cmd())) {
            ORTE_ERROR_LOG(ret);
            return ret;
        }

        /* setup and enter the event monitor to wait for a wakeup call */
        OPAL_THREAD_LOCK(&orted_globals.mutex);
        while (false == orted_globals.exit_condition) {
            opal_condition_wait(&orted_globals.condition, &orted_globals.mutex);
        }
        OPAL_THREAD_UNLOCK(&orted_globals.mutex);

        /* make sure our local procs are dead - but don't update their state
         * on the HNP as this may be redundant
         */
        orte_odls.kill_local_procs(ORTE_JOBID_WILDCARD, false);

        /* cleanup their session directory */
        orte_session_dir_cleanup(orted_globals.bootproxy);

        /* send an ack - we are as close to done as we can be while
         * still able to communicate
         */
        OBJ_CONSTRUCT(&answer, orte_buffer_t);
        if (0 > orte_rml.send_buffer(ORTE_PROC_MY_HNP, &answer, ORTE_RML_TAG_PLS_ORTED_ACK, 0)) {
            ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
        }
        OBJ_DESTRUCT(&answer);


        /* Finalize and clean up ourselves */
        if (ORTE_SUCCESS != (ret = orte_finalize())) {
            ORTE_ERROR_LOG(ret);
        }
        exit(ret);
    }

    /*
     *  Set my process status to "running". Note that this must be done
     *  after the rte init is completed.
     */
    if (ORTE_SUCCESS != (ret = orte_smr.set_proc_state(orte_process_info.my_name,
                                                     ORTE_PROC_STATE_RUNNING, 0))) {
        ORTE_ERROR_LOG(ret);
        return ret;
    }

    if (orted_globals.debug_daemons) {
        opal_output(0, "[%lu,%lu,%lu] ompid: issuing callback", ORTE_NAME_ARGS(orte_process_info.my_name));
    }

   /* go through the universe fields and see what else I need to do
     * - could be setup a virtual machine, spawn a console, etc.
     */

    if (orted_globals.debug_daemons) {
        opal_output(0, "[%lu,%lu,%lu] ompid: setting up event monitor", ORTE_NAME_ARGS(orte_process_info.my_name));
    }

     /* setup and enter the event monitor */
    OPAL_THREAD_LOCK(&orted_globals.mutex);

    while (false == orted_globals.exit_condition) {
        opal_condition_wait(&orted_globals.condition, &orted_globals.mutex);
    }

    OPAL_THREAD_UNLOCK(&orted_globals.mutex);

    if (orted_globals.debug_daemons) {
       opal_output(0, "[%lu,%lu,%lu] orted: mutex cleared - finalizing", ORTE_NAME_ARGS(orte_process_info.my_name));
    }

    /* cleanup */
    if (NULL != log_path) {
        unlink(log_path);
    }

    /* finalize the system */
    orte_finalize();

    if (orted_globals.debug_daemons) {
       opal_output(0, "[%lu,%lu,%lu] orted: done - exiting", ORTE_NAME_ARGS(orte_process_info.my_name));
    }

    exit(0);
}
Beispiel #27
0
/*
 * Function for selecting one component from all those that are
 * available.
 */
void orte_ras_base_allocate(int fd, short args, void *cbdata)
{
    int rc;
    orte_job_t *jdata;
    opal_list_t nodes;
    orte_node_t *node;
    orte_std_cntr_t i;
    orte_app_context_t *app;
    orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;

    OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
                         "%s ras:base:allocate",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    
    /* convenience */
    jdata = caddy->jdata;

    /* if we already did this, don't do it again - the pool of
     * global resources is set. 
     */
    if (orte_ras_base.allocation_read) {
        
        OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
                             "%s ras:base:allocate allocation already read",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
        goto next_state;
    }
    orte_ras_base.allocation_read = true;

    /* Otherwise, we have to create
     * the initial set of resources that will delineate all
     * further operations serviced by this HNP. This list will
     * contain ALL nodes that can be used by any subsequent job.
     *
     * In other words, if a node isn't found in this step, then
     * no job launched by this HNP will be able to utilize it.
     */
    
    /* construct a list to hold the results */
    OBJ_CONSTRUCT(&nodes, opal_list_t);

    /* if a component was selected, then we know we are in a managed
     * environment.  - the active module will return a list of what it found
     */
    if (NULL != orte_ras_base.active_module)  {
        /* read the allocation */
        if (ORTE_SUCCESS != (rc = orte_ras_base.active_module->allocate(jdata, &nodes))) {
            if (ORTE_ERR_ALLOCATION_PENDING == rc) {
                /* an allocation request is underway, so just do nothing */
                OBJ_DESTRUCT(&nodes);
                OBJ_RELEASE(caddy);
                return;
            }
            if (ORTE_ERR_SYSTEM_WILL_BOOTSTRAP == rc) {
                /* this module indicates that nodes will be discovered
                 * on a bootstrap basis, so all we do here is add our
                 * own node to the list
                 */
                goto addlocal;
            }
            if (ORTE_ERR_TAKE_NEXT_OPTION == rc) {
                /* we have an active module, but it is unable to
                 * allocate anything for this job - this indicates
                 * that it isn't a fatal error, but could be if
                 * an allocation is required
                 */
                if (orte_allocation_required) {
                    /* an allocation is required, so this is fatal */
                    OBJ_DESTRUCT(&nodes);
                    orte_show_help("help-ras-base.txt", "ras-base:no-allocation", true);
                    ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
                    OBJ_RELEASE(caddy);
                    return;
                } else {
                    /* an allocation is not required, so we can just
                     * run on the local node - go add it
                     */
                    goto addlocal;
                }
            }
            ORTE_ERROR_LOG(rc);
            OBJ_DESTRUCT(&nodes);
            ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
            OBJ_RELEASE(caddy);
            return;
        }
    } 
    /* If something came back, save it and we are done */
    if (!opal_list_is_empty(&nodes)) {
        /* store the results in the global resource pool - this removes the
         * list items
         */
        if (ORTE_SUCCESS != (rc = orte_ras_base_node_insert(&nodes, jdata))) {
            ORTE_ERROR_LOG(rc);
            OBJ_DESTRUCT(&nodes);
            ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
            OBJ_RELEASE(caddy);
            return;
        }
        OBJ_DESTRUCT(&nodes);
        /* default to no-oversubscribe-allowed for managed systems */
        if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
            ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE);
        }
        /* flag that the allocation is managed */
        orte_managed_allocation = true;
        goto DISPLAY;
    } else if (orte_allocation_required) {
        /* if nothing was found, and an allocation is
         * required, then error out
         */
        OBJ_DESTRUCT(&nodes);
        orte_show_help("help-ras-base.txt", "ras-base:no-allocation", true);
        ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
        OBJ_RELEASE(caddy);
        return;
    }
    
    OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
                         "%s ras:base:allocate nothing found in module - proceeding to hostfile",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    
    /* nothing was found, or no active module was alive. Our next
     * option is to look for a hostfile and assign our global
     * pool from there.
     *
     * Individual hostfile names, if given, are included
     * in the app_contexts for this job. We therefore need to
     * retrieve the app_contexts for the job, and then cycle
     * through them to see if anything is there. The parser will
     * add the nodes found in each hostfile to our list - i.e.,
     * the resulting list contains the UNION of all nodes specified
     * in hostfiles from across all app_contexts
     *
     * We then continue to add any hosts provided by dash-host and
     * the default hostfile, if we have it. We will then filter out
     * all the non-desired hosts (i.e., those not specified by
     * -host and/or -hostfile) when we start the mapping process
     *
     * Note that any relative node syntax found in the hostfiles will
     * generate an error in this scenario, so only non-relative syntax
     * can be present
     */
    if (NULL != orte_default_hostfile) {
        OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
                             "%s ras:base:allocate parsing default hostfile %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             orte_default_hostfile));
        
        /* a default hostfile was provided - parse it */
        if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&nodes,
                                                               orte_default_hostfile))) {
            ORTE_ERROR_LOG(rc);
            OBJ_DESTRUCT(&nodes);
            ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
            OBJ_RELEASE(caddy);
            return;
        }
    }
    for (i=0; i < jdata->apps->size; i++) {
        if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
            continue;
        }
        if (NULL != app->hostfile) {
            OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
                                 "%s ras:base:allocate adding hostfile %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 app->hostfile));
            
            /* hostfile was specified - parse it and add it to the list */
            if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&nodes,
                                                                   app->hostfile))) {
                ORTE_ERROR_LOG(rc);
                OBJ_DESTRUCT(&nodes);
                /* set an error event */
                ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
                OBJ_RELEASE(caddy);
                return;
            }
        } else if (!orte_soft_locations && NULL != app->dash_host) {
            /* if we are using soft locations, then any dash-host would
             * just include desired nodes and not required. We don't want
             * to pick them up here as this would mean the request was
             * always satisfied - instead, we want to allow the request
             * to fail later on and use whatever nodes are actually
             * available
             */
            OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
                                 "%s ras:base:allocate adding dash_hosts",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
            if (ORTE_SUCCESS != (rc = orte_util_add_dash_host_nodes(&nodes,
                                                                    app->dash_host))) {
                ORTE_ERROR_LOG(rc);
                OBJ_DESTRUCT(&nodes);
                ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
                OBJ_RELEASE(caddy);
                return;
            }
        }
    }

    /* if something was found in the hostfile(s), we use that as our global
     * pool - set it and we are done
     */
    if (!opal_list_is_empty(&nodes)) {
        /* store the results in the global resource pool - this removes the
         * list items
         */
        if (ORTE_SUCCESS != (rc = orte_ras_base_node_insert(&nodes, jdata))) {
            ORTE_ERROR_LOG(rc);
            ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
            OBJ_RELEASE(caddy);
            return;
        }
        /* cleanup */
        OBJ_DESTRUCT(&nodes);
        goto DISPLAY;
    }
    
    OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
                         "%s ras:base:allocate nothing found in hostfiles - checking for rankfile",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    
    /* Our next option is to look for a rankfile - if one was provided, we
     * will use its nodes to create a default allocation pool
     */
    if (NULL != orte_rankfile) {
        /* check the rankfile for node information */
        if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&nodes,
                                                               orte_rankfile))) {
            ORTE_ERROR_LOG(rc);
            OBJ_DESTRUCT(&nodes);
            ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
            OBJ_RELEASE(caddy);
            return ;
        }
    }
    /* if something was found in rankfile, we use that as our global
     * pool - set it and we are done
     */
    if (!opal_list_is_empty(&nodes)) {
        /* store the results in the global resource pool - this removes the
         * list items
         */
        if (ORTE_SUCCESS != (rc = orte_ras_base_node_insert(&nodes, jdata))) {
            ORTE_ERROR_LOG(rc);
            ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
            OBJ_RELEASE(caddy);
            return;
        }
        /* rankfile is considered equivalent to an RM allocation */
        if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
            ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE);
        }
        /* cleanup */
        OBJ_DESTRUCT(&nodes);
        goto DISPLAY;
    }
    
    
    OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
                         "%s ras:base:allocate nothing found in rankfile - inserting current node",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    
 addlocal:
    /* if nothing was found by any of the above methods, then we have no
     * earthly idea what to do - so just add the local host
     */
    node = OBJ_NEW(orte_node_t);
    if (NULL == node) {
        ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
        OBJ_DESTRUCT(&nodes);
        ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
        OBJ_RELEASE(caddy);
        return;
    }
    /* use the same name we got in orte_process_info so we avoid confusion in
     * the session directories
     */
    node->name = strdup(orte_process_info.nodename);
    node->state = ORTE_NODE_STATE_UP;
    node->slots_inuse = 0;
    node->slots_max = 0;
    node->slots = 1;
    opal_list_append(&nodes, &node->super);
    
    /* store the results in the global resource pool - this removes the
     * list items
     */
    if (ORTE_SUCCESS != (rc = orte_ras_base_node_insert(&nodes, jdata))) {
        ORTE_ERROR_LOG(rc);
        OBJ_DESTRUCT(&nodes);
        ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
        OBJ_RELEASE(caddy);
        return;
    }
    OBJ_DESTRUCT(&nodes);

 DISPLAY:
    /* shall we display the results? */
    if (4 < opal_output_get_verbosity(orte_ras_base_framework.framework_output)) {
        orte_ras_base_display_alloc();
    }

 next_state:
    /* are we to report this event? */
    if (orte_report_events) {
        if (ORTE_SUCCESS != (rc = orte_util_comm_report_event(ORTE_COMM_EVENT_ALLOCATE))) {
            ORTE_ERROR_LOG(rc);
            ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
            OBJ_RELEASE(caddy);
        }
    }
    
    /* set total slots alloc */
    jdata->total_slots_alloc = orte_ras_base.total_slots_alloc;

    /* set the job state to the next position */
    ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_ALLOCATION_COMPLETE);

    /* cleanup */
    OBJ_RELEASE(caddy);
}
Beispiel #28
0
static void orte_daemon_recv_pls(int status, orte_process_name_t* sender,
                 orte_buffer_t *buffer, orte_rml_tag_t tag,
                 void* cbdata)
{
    orte_daemon_cmd_flag_t command;
    orte_buffer_t answer;
    int ret;
    orte_std_cntr_t n;
    int32_t signal;
    orte_gpr_notify_data_t *ndat;
    orte_jobid_t job;

    OPAL_TRACE(1);

    OPAL_THREAD_LOCK(&orted_globals.mutex);

    if (orted_globals.debug_daemons) {
       opal_output(0, "[%lu,%lu,%lu] orted_recv_pls: received message from [%ld,%ld,%ld]",
                   ORTE_NAME_ARGS(orte_process_info.my_name),
                   ORTE_NAME_ARGS(sender));
    }

    /* unpack the command */
    n = 1;
    if (ORTE_SUCCESS != (ret = orte_dss.unpack(buffer, &command, &n, ORTE_DAEMON_CMD))) {
        ORTE_ERROR_LOG(ret);
        goto CLEANUP;
    }
    
    switch(command) {

        /****    KILL_LOCAL_PROCS   ****/
        case ORTE_DAEMON_KILL_LOCAL_PROCS:
            if (orted_globals.debug_daemons) {
                opal_output(0, "[%lu,%lu,%lu] orted_recv_pls: received kill_local_procs",
                            ORTE_NAME_ARGS(orte_process_info.my_name));
            }
            /* unpack the jobid - could be JOBID_WILDCARD, which would indicatge
             * we should kill all local procs. Otherwise, only kill those within
             * the specified jobid
             */
            n = 1;
            if (ORTE_SUCCESS != (ret = orte_dss.unpack(buffer, &job, &n, ORTE_JOBID))) {
                ORTE_ERROR_LOG(ret);
                goto CLEANUP;
            }

            if (ORTE_SUCCESS != (ret = orte_odls.kill_local_procs(job, true))) {
                ORTE_ERROR_LOG(ret);
            }
            break;
            
        /****    SIGNAL_LOCAL_PROCS   ****/
        case ORTE_DAEMON_SIGNAL_LOCAL_PROCS:
            if (orted_globals.debug_daemons) {
                opal_output(0, "[%lu,%lu,%lu] orted_recv_pls: received signal_local_procs",
                            ORTE_NAME_ARGS(orte_process_info.my_name));
            }
            /* get the signal */
            n = 1;
            if (ORTE_SUCCESS != (ret = orte_dss.unpack(buffer, &signal, &n, ORTE_INT32))) {
                ORTE_ERROR_LOG(ret);
                goto CLEANUP;
            }
                
            /* see if they specified a process to signal, or if we
             * should just signal them all
             *
             * NOTE: FOR NOW, WE JUST SIGNAL ALL CHILDREN
             */

            if (ORTE_SUCCESS != (ret = orte_odls.signal_local_procs(NULL, signal))) {
                ORTE_ERROR_LOG(ret);
            }
            break;

            /****    ADD_LOCAL_PROCS   ****/
        case ORTE_DAEMON_ADD_LOCAL_PROCS:
            if (orted_globals.debug_daemons) {
                opal_output(0, "[%lu,%lu,%lu] orted_recv_pls: received add_local_procs",
                            ORTE_NAME_ARGS(orte_process_info.my_name));
            }
            /* unpack the notify data object */
            n = 1;
            if (ORTE_SUCCESS != (ret = orte_dss.unpack(buffer, &ndat, &n, ORTE_GPR_NOTIFY_DATA))) {
                ORTE_ERROR_LOG(ret);
                goto CLEANUP;
            }
            
            /* launch the processes */
            if (ORTE_SUCCESS != (ret = orte_odls.launch_local_procs(ndat, orted_globals.saved_environ))) {
                ORTE_ERROR_LOG(ret);
            }

            /* cleanup the memory */
            OBJ_RELEASE(ndat);
            break;
           
            /****    EXIT COMMAND    ****/
        case ORTE_DAEMON_EXIT_CMD:
            if (orted_globals.debug_daemons) {
                opal_output(0, "[%lu,%lu,%lu] orted_recv_pls: received exit",
                            ORTE_NAME_ARGS(orte_process_info.my_name));
            }
            /* no response to send here - we'll send it when nearly exit'd */
            orted_globals.exit_condition = true;
            opal_condition_signal(&orted_globals.condition);
            OPAL_THREAD_UNLOCK(&orted_globals.mutex);
            return;
            break;

        default:
            ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
            break;
    }

 CLEANUP:
    /* send an ack that command is done */
    OBJ_CONSTRUCT(&answer, orte_buffer_t);
    if (0 > orte_rml.send_buffer(sender, &answer, ORTE_RML_TAG_PLS_ORTED_ACK, 0)) {
        ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
    }
    OBJ_DESTRUCT(&answer);
    
    OPAL_THREAD_UNLOCK(&orted_globals.mutex);

    /* reissue the non-blocking receive */
    ret = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_PLS_ORTED, ORTE_RML_NON_PERSISTENT, orte_daemon_recv_pls, NULL);
    if (ret != ORTE_SUCCESS && ret != ORTE_ERR_NOT_IMPLEMENTED) {
        ORTE_ERROR_LOG(ret);
    }

    return;
}
int orte_register_params(void)
{
    int id;
    opal_output_stream_t lds;

    /* only go thru this once - mpirun calls it twice, which causes
     * any error messages to show up twice
     */
    if (passed_thru) {
        return ORTE_SUCCESS;
    }
    passed_thru = true;

    /* get a clean output channel too - need to do this here because
     * we use it below, and orterun and some other tools call this
     * function prior to calling orte_init
     */
    OBJ_CONSTRUCT(&lds, opal_output_stream_t);
    lds.lds_want_stdout = true;
    orte_clean_output = opal_output_open(&lds);
    OBJ_DESTRUCT(&lds);

    orte_help_want_aggregate = true;
    (void) mca_base_var_register ("orte", "orte", "base", "help_aggregate",
                                  "If orte_base_help_aggregate is true, duplicate help messages will be aggregated rather than displayed individually.  This can be helpful for parallel jobs that experience multiple identical failures; rather than print out the same help/failure message N times, display it once with a count of how many processes sent the same message.",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL_EQ,
                                  &orte_help_want_aggregate);

    /* LOOK FOR A TMP DIRECTORY BASE */
    /* Several options are provided to cover a range of possibilities:
     *
     * (a) all processes need to use a specified location as the base
     *     for tmp directories
     * (b) daemons on remote nodes need to use a specified location, but
     *     one different from that used by mpirun
     * (c) mpirun needs to use a specified location, but one different
     *     from that used on remote nodes
     */
    orte_tmpdir_base = NULL;
    (void) mca_base_var_register ("orte", "orte", NULL, "tmpdir_base",
                                  "Base of the session directory tree to be used by all processes",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL_EQ,
                                  &orte_tmpdir_base);

    orte_local_tmpdir_base = NULL;
    (void) mca_base_var_register ("orte", "orte", NULL, "local_tmpdir_base",
                                  "Base of the session directory tree to be used by orterun/mpirun",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL_EQ,
                                  &orte_local_tmpdir_base);

    orte_remote_tmpdir_base = NULL;
    (void) mca_base_var_register ("orte", "orte", NULL, "remote_tmpdir_base",
                                  "Base of the session directory tree on remote nodes, if required to be different from head node",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL_EQ,
                                  &orte_remote_tmpdir_base);

    /* if a global tmpdir was specified, then we do not allow specification
     * of the local or remote values to avoid confusion
     */
    if (NULL != orte_tmpdir_base &&
        (NULL != orte_local_tmpdir_base || NULL != orte_remote_tmpdir_base)) {
        opal_output(orte_clean_output,
                    "------------------------------------------------------------------\n"
                    "The MCA param orte_tmpdir_base was specified, which sets the base\n"
                    "of the temporary directory tree for all procs. However, values for\n"
                    "the local and/or remote tmpdir base were also given. This can lead\n"
                    "to confusion and is therefore not allowed. Please specify either a\n"
                    "global tmpdir base OR a local/remote tmpdir base value\n"
                    "------------------------------------------------------------------");
        exit(1);
    }

    if (NULL != orte_tmpdir_base) {
        if (NULL != orte_process_info.tmpdir_base) {
            free(orte_process_info.tmpdir_base);
        }
        orte_process_info.tmpdir_base = strdup (orte_tmpdir_base);
    } else if (ORTE_PROC_IS_HNP && NULL != orte_local_tmpdir_base) {
        /* orterun will pickup the value for its own use */
        if (NULL != orte_process_info.tmpdir_base) {
            free(orte_process_info.tmpdir_base);
        }
        orte_process_info.tmpdir_base = strdup (orte_local_tmpdir_base);
    } else if (ORTE_PROC_IS_DAEMON && NULL != orte_remote_tmpdir_base) {
        /* orterun will pickup the value and forward it along, but must not
         * use it in its own work. So only a daemon needs to get it, and the
         * daemon will pass it down to its application procs. Note that orterun
         * will pass -its- value to any procs local to it
         */
        if (NULL != orte_process_info.tmpdir_base) {
            free(orte_process_info.tmpdir_base);
        }
        orte_process_info.tmpdir_base = strdup (orte_remote_tmpdir_base);
    }

    orte_top_session_dir = NULL;
    (void) mca_base_var_register ("orte", "orte", NULL, "top_session_dir",
                                  "Top of the session directory tree for applications",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL_EQ,
                                  &orte_top_session_dir);

    if (NULL != orte_top_session_dir) {
         if (NULL != orte_process_info.top_session_dir) {
            free(orte_process_info.top_session_dir);
        }
        orte_process_info.top_session_dir = strdup(orte_top_session_dir);
    }

    orte_jobfam_session_dir = NULL;
    (void) mca_base_var_register ("orte", "orte", NULL, "jobfam_session_dir",
                                  "The jobfamily session directory for applications",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL_EQ,
                                  &orte_jobfam_session_dir);

    if (NULL != orte_jobfam_session_dir) {
        if (NULL != orte_process_info.jobfam_session_dir) {
            free(orte_process_info.jobfam_session_dir);
        }
        orte_process_info.jobfam_session_dir = strdup(orte_jobfam_session_dir);
    }

    orte_prohibited_session_dirs = NULL;
    (void) mca_base_var_register ("orte", "orte", NULL, "no_session_dirs",
                                  "Prohibited locations for session directories (multiple locations separated by ',', default=NULL)",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
                                  &orte_prohibited_session_dirs);

    orte_create_session_dirs = true;
    (void) mca_base_var_register ("orte", "orte", NULL, "create_session_dirs",
                                  "Create session directories",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
                                  &orte_create_session_dirs);

    orte_execute_quiet = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "execute_quiet",
                                  "Do not output error and help messages",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
                                  &orte_execute_quiet);

    orte_report_silent_errors = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "report_silent_errors",
                                  "Report all errors, including silent ones",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
                                  &orte_report_silent_errors);

    orte_debug_flag = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "debug",
                                  "Top-level ORTE debug switch (default: false)",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
                                  &orte_debug_flag);

    orte_debug_verbosity = -1;
    (void) mca_base_var_register ("orte", "orte", NULL, "debug_verbose",
                                  "Verbosity level for ORTE debug messages (default: 1)",
                                  MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
                                  &orte_debug_verbosity);

    orte_debug_daemons_file_flag = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "debug_daemons_file",
                                  "Whether want stdout/stderr of daemons to go to a file or not",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
                                  &orte_debug_daemons_file_flag);
    /* If --debug-daemons-file was specified, that also implies
       --debug-daemons */
    if (orte_debug_daemons_file_flag) {
        orte_debug_daemons_flag = true;

        /* value can't change */
        (void) mca_base_var_register ("orte", "orte", NULL, "debug_daemons",
                                      "Whether to debug the ORTE daemons or not",
                                      MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                      OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_CONSTANT,
                                      &orte_debug_daemons_flag);
    } else {
        orte_debug_daemons_flag = false;

        (void) mca_base_var_register ("orte", "orte", NULL, "debug_daemons",
                                      "Whether to debug the ORTE daemons or not",
                                      MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                      OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
                                      &orte_debug_daemons_flag);
    }

    orte_progress_thread_debug_level = -1;
    (void) mca_base_var_register ("orte", "orte", NULL, "progress_thread_debug",
                                  "Debug level for ORTE progress threads",
                                  MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
                                  &orte_progress_thread_debug_level);

    if (0 <= orte_progress_thread_debug_level) {
        orte_progress_thread_debug = opal_output_open(NULL);
        opal_output_set_verbosity(orte_progress_thread_debug,
                                  orte_progress_thread_debug_level);
    }

    /* do we want session output left open? */
    orte_leave_session_attached = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "leave_session_attached",
                                  "Whether applications and/or daemons should leave their sessions "
                                  "attached so that any output can be received - this allows X forwarding "
                                  "without all the attendant debugging output",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
                                  &orte_leave_session_attached);

    /* if any debug level is set, ensure we output debug level dumps */
    if (orte_debug_flag || orte_debug_daemons_flag || orte_leave_session_attached) {
        orte_devel_level_output = true;
    }

    /* See comment in orte/tools/orterun/orterun.c about this MCA
       param (this param is internal) */
    orte_in_parallel_debugger = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "in_parallel_debugger",
                                  "Whether the application is being debugged "
                                  "in a parallel debugger (default: false)",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_INTERNAL,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_in_parallel_debugger);

    orte_debugger_dump_proctable = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "output_debugger_proctable",
                                  "Whether or not to output the debugger proctable after launch (default: false)",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
                                  &orte_debugger_dump_proctable);

    orte_debugger_test_daemon = NULL;
    (void) mca_base_var_register ("orte", "orte", NULL, "debugger_test_daemon",
                                  "Name of the executable to be used to simulate a debugger colaunch (relative or absolute path)",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_debugger_test_daemon);

    orte_debugger_test_attach = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "debugger_test_attach",
                                  "Test debugger colaunch after debugger attachment",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_debugger_test_daemon);

    orte_debugger_check_rate = 0;
    (void) mca_base_var_register ("orte", "orte", NULL, "debugger_check_rate",
                                  "Set rate (in secs) for auto-detect of debugger attachment (0 => do not check)",
                                  MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_debugger_check_rate);

    orte_do_not_launch = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "do_not_launch",
                                  "Perform all necessary operations to prepare to launch the application, but do not actually launch it",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_do_not_launch);

    orted_spin_flag = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "daemon_spin",
                                  "Have any orteds spin until we can connect a debugger to them",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orted_spin_flag);

    orted_debug_failure = ORTE_VPID_INVALID;
    (void) mca_base_var_register ("orte", "orte", NULL, "daemon_fail",
                                  "Have the specified orted fail after init for debugging purposes",
                                  MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orted_debug_failure);

    orted_debug_failure_delay = 0;
    (void) mca_base_var_register ("orte", "orte", NULL, "daemon_fail_delay",
                                  "Have the specified orted fail after specified number of seconds (default: 0 => no delay)",
                                  MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orted_debug_failure_delay);

    orte_startup_timeout = 0;
    (void) mca_base_var_register ("orte", "orte", NULL, "startup_timeout",
                                  "Seconds to wait for startup or job launch before declaring failed_to_start (default: 0 => do not check)",
                                  MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_startup_timeout);

    /* User-level debugger info string */
    orte_base_user_debugger = "totalview @mpirun@ -a @mpirun_args@ : ddt -n @np@ -start @executable@ @executable_argv@ @single_app@ : fxp @mpirun@ -a @mpirun_args@";
    (void) mca_base_var_register ("orte", "orte", NULL, "base_user_debugger",
                                  "Sequence of user-level debuggers to search for in orterun",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_base_user_debugger);

#if 0
    mca_base_param_reg_int_name("orte", "abort_timeout",
                                "Max time to wait [in secs] before aborting an ORTE operation (default: 1sec)",
                                false, false, 1, &value);
    orte_max_timeout = 1000000.0 * value;  /* convert to usec */

    mca_base_param_reg_int_name("orte", "timeout_step",
                                "Time to wait [in usecs/proc] before aborting an ORTE operation (default: 1000 usec/proc)",
                                false, false, 1000, &orte_timeout_usec_per_proc);
#endif

    /* default hostfile */
    orte_default_hostfile = NULL;
    (void) mca_base_var_register ("orte", "orte", NULL, "default_hostfile",
                                  "Name of the default hostfile (relative or absolute path, \"none\" to ignore environmental or default MCA param setting)",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_default_hostfile);

    if (NULL == orte_default_hostfile) {
        /* nothing was given, so define the default */
        asprintf(&orte_default_hostfile, "%s/openmpi-default-hostfile", opal_install_dirs.sysconfdir);
        /* flag that nothing was given */
        orte_default_hostfile_given = false;
    } else if (0 == strcmp(orte_default_hostfile, "none")) {
        free (orte_default_hostfile);
        orte_default_hostfile = NULL;
        /* flag that it was given */
        orte_default_hostfile_given = true;
    } else {
        /* flag that it was given */
        orte_default_hostfile_given = true;
    }

    /* default dash-host */
    orte_default_dash_host = NULL;
    (void) mca_base_var_register ("orte", "orte", NULL, "default_dash_host",
                                  "Default -host setting (specify \"none\" to ignore environmental or default MCA param setting)",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_default_dash_host);
    if (NULL != orte_default_dash_host &&
        0 == strcmp(orte_default_dash_host, "none")) {
        free(orte_default_dash_host);
        orte_default_dash_host = NULL;
    }

    /* regex of nodes in system */
    orte_node_regex = NULL;
    (void) mca_base_var_register ("orte", "orte", NULL, "node_regex",
                                  "Regular expression defining nodes in the system",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_node_regex);

    /* whether or not to keep FQDN hostnames */
    orte_keep_fqdn_hostnames = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "keep_fqdn_hostnames",
                                  "Whether or not to keep FQDN hostnames [default: no]",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_keep_fqdn_hostnames);

    /* whether or not to retain aliases of hostnames */
    orte_retain_aliases = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "retain_aliases",
                                  "Whether or not to keep aliases for host names [default: no]",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_retain_aliases);

    /* which alias to use in MPIR_proctab */
    orte_use_hostname_alias = 1;
    (void) mca_base_var_register ("orte", "orte", NULL, "hostname_alias_index",
                                  "If hostname aliases are being retained, which one to use for the debugger proc table [default: 1st alias]",
                                  MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_use_hostname_alias);

    orte_xml_output = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "xml_output",
                                  "Display all output in XML format (default: false)",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_xml_output);

    /* whether to tag output */
    /* if we requested xml output, be sure to tag the output as well */
    orte_tag_output = orte_xml_output;
    (void) mca_base_var_register ("orte", "orte", NULL, "tag_output",
                                  "Tag all output with [job,rank] (default: false)",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_tag_output);
    if (orte_xml_output) {
        orte_tag_output = true;
    }


    orte_xml_file = NULL;
    (void) mca_base_var_register ("orte", "orte", NULL, "xml_file",
                                  "Provide all output in XML format to the specified file",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_xml_file);
    if (NULL != orte_xml_file) {
        if (ORTE_PROC_IS_HNP && NULL == orte_xml_fp) {
            /* only the HNP opens this file! Make sure it only happens once */
            orte_xml_fp = fopen(orte_xml_file, "w");
            if (NULL == orte_xml_fp) {
                opal_output(0, "Could not open specified xml output file: %s", orte_xml_file);
                return ORTE_ERROR;
            }
        }
        /* ensure we set the flags to tag output */
        orte_xml_output = true;
        orte_tag_output = true;
    } else {
        /* default to stdout */
        orte_xml_fp = stdout;
    }

    /* whether to timestamp output */
    orte_timestamp_output = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "timestamp_output",
                                  "Timestamp all application process output (default: false)",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_timestamp_output);

    /* redirect output into files */
    orte_output_filename = NULL;
    (void) mca_base_var_register ("orte", "orte", NULL, "output_filename",
                                  "Redirect output from application processes into filename.rank [default: NULL]",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_output_filename);

    orte_show_resolved_nodenames = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "show_resolved_nodenames",
                                  "Display any node names that are resolved to a different name (default: false)",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_show_resolved_nodenames);

#if 0
    /* XXX -- option doesn't appear to do anything */
    mca_base_param_reg_int_name("orte", "hetero_apps",
                                "Indicates that multiple app_contexts are being provided that are a mix of 32/64 bit binaries (default: false)",
                                false, false, (int) false, &value);
    orte_hetero_apps = OPAL_INT_TO_BOOL(value);
#endif

    orte_hetero_nodes = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "hetero_nodes",
                                  "Nodes in cluster may differ in topology, so send the topology back from each node [Default = false]",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_hetero_nodes);

    /* allow specification of the launch agent */
    orte_launch_agent = "orted";
    (void) mca_base_var_register ("orte", "orte", NULL, "launch_agent",
                                  "Command used to start processes on remote nodes (default: orted)",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_launch_agent);

    orte_fork_agent_string = NULL;
    (void) mca_base_var_register ("orte", "orte", NULL, "fork_agent",
                                  "Command used to fork processes on remote nodes (default: NULL)",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_fork_agent_string);

    if (NULL != orte_fork_agent_string) {
        orte_fork_agent = opal_argv_split(orte_fork_agent_string, ' ');
    }

    /* whether or not to require RM allocation */
    orte_allocation_required = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "allocation_required",
                                  "Whether or not an allocation by a resource manager is required [default: no]",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_allocation_required);

    /* whether or not to map stddiag to stderr */
    orte_map_stddiag_to_stderr = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "map_stddiag_to_stderr",
                                  "Map output from opal_output to stderr of the local process [default: no]",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_map_stddiag_to_stderr);

    /* generate new terminal windows to display output from specified ranks */
    orte_xterm = NULL;
    (void) mca_base_var_register ("orte", "orte", NULL, "xterm",
                                  "Create a new xterm window and display output from the specified ranks there [default: none]",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_xterm);
    if (NULL != orte_xterm) {
        /* if an xterm request is given, we have to leave any ssh
         * sessions attached so the xterm window manager can get
         * back to the controlling terminal
         */
        orte_leave_session_attached = true;
        /* also want to redirect stddiag output from opal_output
         * to stderr from the process so those messages show
         * up in the xterm window instead of being forwarded to mpirun
         */
        orte_map_stddiag_to_stderr = true;
    }

    /* whether or not to report launch progress */
    orte_report_launch_progress = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "report_launch_progress",
                                  "Output a brief periodic report on launch progress [default: no]",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_report_launch_progress);

    /* cluster hardware info detected by orte only */
    orte_local_cpu_type = NULL;
    (void) mca_base_var_register ("orte", "orte", NULL, "cpu_type",
                                  "cpu type detected in node",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, MCA_BASE_VAR_FLAG_INTERNAL,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_local_cpu_type);

    orte_local_cpu_model = NULL;
    (void) mca_base_var_register ("orte", "orte", NULL, "cpu_model",
                                  "cpu model detected in node",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, MCA_BASE_VAR_FLAG_INTERNAL,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_local_cpu_model);

    /* tool communication controls */
    orte_report_events_uri = NULL;
    (void) mca_base_var_register ("orte", "orte", NULL, "report_events",
                                  "URI to which events are to be reported (default: NULL)",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_report_events_uri);
    if (NULL != orte_report_events_uri) {
        orte_report_events = true;
    }

    /* barrier control */
    orte_do_not_barrier = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "do_not_barrier",
                                  "Do not barrier in orte_init",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_INTERNAL,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_do_not_barrier);

    orte_enable_recovery = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "enable_recovery",
                                  "Enable recovery from process failure [Default = disabled]",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_enable_recovery);

    orte_max_restarts = 0;
    (void) mca_base_var_register ("orte", "orte", NULL, "max_restarts",
                                  "Max number of times to restart a failed process",
                                  MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_max_restarts);

    if (!orte_enable_recovery && orte_max_restarts != 0) {
        if (ORTE_PROC_IS_HNP) {
            opal_output(orte_clean_output,
                        "------------------------------------------------------------------\n"
                        "The MCA param orte_enable_recovery was not set to true, but\n"
                        "a value was provided for the number of restarts:\n\n"
                        "Max restarts: %d\n"
                        "We are enabling process recovery and continuing execution. To avoid\n"
                        "this warning in the future, please set the orte_enable_recovery\n"
                        "param to non-zero.\n"
                        "------------------------------------------------------------------",
                        orte_max_restarts);
        }
        orte_enable_recovery = true;
    }

    orte_abort_non_zero_exit = true;
    (void) mca_base_var_register ("orte", "orte", NULL, "abort_on_non_zero_status",
                                  "Abort the job if any process returns a non-zero exit status - no restart in such cases",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_abort_non_zero_exit);

    orte_allowed_exit_without_sync = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "allowed_exit_without_sync",
                                  "Process exiting without calling finalize will not trigger job termination",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_allowed_exit_without_sync);

    orte_staged_execution = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "staged_execution",
                                  "Staged execution is being used",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_staged_execution);

    orte_report_child_jobs_separately = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "report_child_jobs_separately",
                                  "Return the exit status of the primary job only",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_report_child_jobs_separately);


#if 0
    /* XXX -- unused parameter */
    mca_base_param_reg_int_name("orte", "child_time_to_exit",
                                "Max time a spawned child job is allowed to run after the primary job has terminated (seconds)",
                                false, false,
                                INT_MAX, &value);
    orte_child_time_to_exit.tv_sec = value;
    orte_child_time_to_exit.tv_usec = 0;
#endif

    orte_stat_history_size = 1;
    (void) mca_base_var_register ("orte", "orte", NULL, "stat_history_size",
                                  "Number of stat samples to keep",
                                  MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_stat_history_size);

    orte_max_vm_size = -1;
    (void) mca_base_var_register ("orte", "orte", NULL, "max_vm_size",
                                  "Maximum size of virtual machine - used to subdivide allocation",
                                  MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_max_vm_size);

    if (opal_hwloc_use_hwthreads_as_cpus) {
        orte_set_slots = "hwthreads";
    } else {
        orte_set_slots = "cores";
    }
    (void) mca_base_var_register ("orte", "orte", NULL, "set_default_slots",
                                  "Set the number of slots on nodes that lack such info to the"
                                  " number of specified objects [a number, \"cores\" (default),"
                                  " \"numas\", \"sockets\", \"hwthreads\" (default if hwthreads_as_cpus is set),"
                                  " or \"none\" to skip this option]",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_set_slots);

    /* should we display the allocation after determining it? */
    orte_display_allocation = false;
    id = mca_base_var_register ("orte", "orte", NULL, "display_alloc",
                                "Whether to display the allocation after it is determined",
                                MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                &orte_display_allocation);
    /* register a synonym for old name -- should we remove this now? */
    mca_base_var_register_synonym (id, "orte", "ras", "base", "display_alloc", MCA_BASE_VAR_SYN_FLAG_DEPRECATED);

    /* should we display a detailed (developer-quality) version of the allocation after determining it? */
    orte_devel_level_output = false;
    id = mca_base_var_register ("orte", "orte", NULL, "display_devel_alloc",
                                "Whether to display a developer-detail allocation after it is determined",
                                MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                &orte_devel_level_output);
    /* register a synonym for old name -- should we remove this now? */
    mca_base_var_register_synonym (id, "orte", "ras", "base", "display_devel_alloc", MCA_BASE_VAR_SYN_FLAG_DEPRECATED);

    if (orte_devel_level_output) {
        orte_display_allocation = true;
    }

    /* should we treat any -host directives as "soft" - i.e., desired
     * but not required
     */
    orte_soft_locations = false;
    (void) mca_base_var_register ("orte", "orte", NULL, "soft_locations",
                                  "Treat -host directives as desired, but not required",
                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
                                  OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_soft_locations);

    /* allow specification of the cores to be used by daemons */
    orte_daemon_cores = NULL;
    (void) mca_base_var_register ("orte", "orte", NULL, "daemon_cores",
                                  "Restrict the ORTE daemons (including mpirun) to operate on the specified cores (comma-separated list of ranges)",
                                  MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
                                  OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_READONLY,
                                  &orte_daemon_cores);

    /* cutoff for full modex */
    orte_direct_modex_cutoff = UINT32_MAX;
    id = mca_base_var_register ("orte", "orte", NULL, "direct_modex_cutoff",
                                "If the number of processes in the application exceeds the provided value,"
                                "modex will be done upon demand [default: UINT32_MAX]",
                                MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, 0,
                                OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
                                &orte_direct_modex_cutoff);
    /* register a synonym for old name */
    mca_base_var_register_synonym (id, "ompi", "ompi", "hostname", "cutoff", MCA_BASE_VAR_SYN_FLAG_DEPRECATED);

    return ORTE_SUCCESS;
}
/**
 * Function for weeding out sensor components that don't want to run.
 *
 * Call the init function on all available components to find out if
 * they want to run.  Select all components that don't fail.  Failing
 * components will be closed and unloaded.  The selected modules will
 * be returned to the caller in a opal_list_t.
 */
int orte_sensor_base_select(void)
{
    mca_base_component_list_item_t *cli = NULL;
    mca_base_component_t *component = NULL;
    mca_base_module_t *module = NULL;
    orte_sensor_active_module_t *i_module;
    opal_list_item_t *item;
    int priority = 0, i, j, low_i;
    opal_pointer_array_t tmp_array;
    bool none_found;
    orte_sensor_active_module_t *tmp_module = NULL, *tmp_module_sw = NULL;
    orte_job_t *jdata;

    if (selected) {
        return ORTE_SUCCESS;
    }
    selected = true;

    OBJ_CONSTRUCT(&tmp_array, opal_pointer_array_t);
    
    opal_output_verbose(10, orte_sensor_base_framework.framework_output,
                        "sensor:base:select: Auto-selecting components");
    
    /*
     * Traverse the list of available components.
     * For each call their 'query' functions to determine relative priority.
     */
    none_found = true;
    for (item  = opal_list_get_first(&orte_sensor_base_framework.framework_components);
         item != opal_list_get_end(&orte_sensor_base_framework.framework_components);
         item  = opal_list_get_next(item) ) {
        cli = (mca_base_component_list_item_t *) item;
        component = (mca_base_component_t *) cli->cli_component;
        
        /*
         * If there is a query function then use it.
         */
        if (NULL == component->mca_query_component) {
            opal_output_verbose(5, orte_sensor_base_framework.framework_output,
                                "sensor:base:select Skipping component [%s]. It does not implement a query function",
                                component->mca_component_name );
            continue;
        }
        
        /*
         * Query this component for the module and priority
         */
        opal_output_verbose(5, orte_sensor_base_framework.framework_output,
                            "sensor:base:select Querying component [%s]",
                            component->mca_component_name);
        
        component->mca_query_component(&module, &priority);
        
        /*
         * If no module was returned or negative priority, then skip component
         */
        if (NULL == module || priority < 0) {
            opal_output_verbose(5, orte_sensor_base_framework.framework_output,
                                "sensor:base:select Skipping component [%s]. Query failed to return a module",
                                component->mca_component_name );
            continue;
        }
        
        /*
         * Append them to the temporary list, we will sort later
         */
        opal_output_verbose(5, orte_sensor_base_framework.framework_output,
                            "sensor:base:select Query of component [%s] set priority to %d", 
                            component->mca_component_name, priority);
        tmp_module = OBJ_NEW(orte_sensor_active_module_t);
        tmp_module->component = component;
        tmp_module->module    = (orte_sensor_base_module_t*)module;
        tmp_module->priority  = priority;
        
        opal_pointer_array_add(&tmp_array, (void*)tmp_module);
        none_found = false;
    }
    
    if (none_found) {
        /* okay for no modules to be found */
        return ORTE_SUCCESS;
    }
    
    /* ensure my_proc and my_node are available on the global arrays */
    if (NULL == (jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
        orte_sensor_base.my_proc = OBJ_NEW(orte_proc_t);
        orte_sensor_base.my_node = OBJ_NEW(orte_node_t);
    } else {
        if (NULL == (orte_sensor_base.my_proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, ORTE_PROC_MY_NAME->vpid))) {
            ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
            return ORTE_ERR_NOT_FOUND;
        }
        if (NULL == (orte_sensor_base.my_node = orte_sensor_base.my_proc->node)) {
            ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
            return ORTE_ERR_NOT_FOUND;
        }
        /* protect the objects */
        OBJ_RETAIN(orte_sensor_base.my_proc);
        OBJ_RETAIN(orte_sensor_base.my_node);
    }

    /*
     * Sort the list by decending priority
     */
    priority = 0;
    for(j = 0; j < tmp_array.size; ++j) {
        tmp_module_sw = (orte_sensor_active_module_t*)opal_pointer_array_get_item(&tmp_array, j);
        if( NULL == tmp_module_sw ) {
            continue;
        }
        
        low_i   = -1;
        priority = tmp_module_sw->priority;
        
        for(i = 0; i < tmp_array.size; ++i) {
            tmp_module = (orte_sensor_active_module_t*)opal_pointer_array_get_item(&tmp_array, i);
            if( NULL == tmp_module ) {
                continue;
            }
            if( tmp_module->priority > priority ) {
                low_i = i;
                priority = tmp_module->priority;
            }
        }
        
        if( low_i >= 0 ) {
            tmp_module = (orte_sensor_active_module_t*)opal_pointer_array_get_item(&tmp_array, low_i);
            opal_pointer_array_set_item(&tmp_array, low_i, NULL);
            j--; /* Try this entry again, if it is not the lowest */
        } else {
            tmp_module = tmp_module_sw;
            opal_pointer_array_set_item(&tmp_array, j, NULL);
        }
        opal_output_verbose(5, orte_sensor_base_framework.framework_output,
                            "sensor:base:select Add module with priority [%s] %d", 
                            tmp_module->component->mca_component_name, tmp_module->priority);
        opal_pointer_array_add(&orte_sensor_base.modules, tmp_module);
    }
    OBJ_DESTRUCT(&tmp_array);
    
    /*
     * Initialize each of the modules in priority order from
     * highest to lowest
     */
    for(i = 0; i < orte_sensor_base.modules.size; ++i) {
        i_module = (orte_sensor_active_module_t*)opal_pointer_array_get_item(&orte_sensor_base.modules, i);
        if( NULL == i_module ) {
            continue;
        }
        if( NULL != i_module->module->init ) {
            if (ORTE_SUCCESS != i_module->module->init()) {
                /* can't run after all */
                opal_pointer_array_set_item(&orte_sensor_base.modules, i, NULL);
            }
        }
    }
    
    return ORTE_SUCCESS;
}