int
opal_installdirs_base_open(void)
{
    int i, ret;
    mca_base_component_list_item_t *cli;

    OBJ_CONSTRUCT(&opal_installdirs_components, opal_list_t);
    for (i = 0 ; mca_installdirs_base_static_components[i] != NULL ; ++i) {
        opal_installdirs_base_component_t *component = 
            (opal_installdirs_base_component_t*) 
            mca_installdirs_base_static_components[i];

        /* Save it in a global list for ompi_info */
        cli = OBJ_NEW(mca_base_component_list_item_t);
        cli->cli_component = mca_installdirs_base_static_components[i];
        opal_list_append(&opal_installdirs_components, 
                         &cli->super);

        if (NULL != component->component.mca_open_component) {
            ret = component->component.mca_open_component();
            if (OPAL_SUCCESS != ret) continue;
        }

        /* copy over the data, if something isn't already there */
        CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data, 
                         prefix);
        CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data,
                         exec_prefix);
        CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data,
                         bindir);
        CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data,
                         sbindir);
        CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data,
                         libexecdir);
        CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data,
                         datarootdir);
        CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data,
                         datadir);
        CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data, 
                         sysconfdir);
        CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data, 
                         sharedstatedir);
        CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data, 
                         localstatedir);
        CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data, 
                         libdir);
        CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data, 
                         includedir);
        CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data, 
                         infodir);
        CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data, 
                         mandir);
        CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data,
                         pkgdatadir);
        CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data, 
                         pkglibdir);
        CONDITIONAL_COPY(opal_install_dirs, component->install_dirs_data, 
                         pkgincludedir);
    }

    /* expand out all the fields */
    opal_install_dirs.prefix = 
        opal_install_dirs_expand(opal_install_dirs.prefix);
    opal_install_dirs.exec_prefix = 
        opal_install_dirs_expand(opal_install_dirs.exec_prefix);
    opal_install_dirs.bindir = 
        opal_install_dirs_expand(opal_install_dirs.bindir);
    opal_install_dirs.sbindir = 
        opal_install_dirs_expand(opal_install_dirs.sbindir);
    opal_install_dirs.libexecdir = 
        opal_install_dirs_expand(opal_install_dirs.libexecdir);
    opal_install_dirs.datarootdir = 
        opal_install_dirs_expand(opal_install_dirs.datarootdir);
    opal_install_dirs.datadir = 
        opal_install_dirs_expand(opal_install_dirs.datadir);
    opal_install_dirs.sysconfdir = 
        opal_install_dirs_expand(opal_install_dirs.sysconfdir);
    opal_install_dirs.sharedstatedir = 
        opal_install_dirs_expand(opal_install_dirs.sharedstatedir);
    opal_install_dirs.localstatedir = 
        opal_install_dirs_expand(opal_install_dirs.localstatedir);
    opal_install_dirs.libdir = 
        opal_install_dirs_expand(opal_install_dirs.libdir);
    opal_install_dirs.includedir = 
        opal_install_dirs_expand(opal_install_dirs.includedir);
    opal_install_dirs.infodir = 
        opal_install_dirs_expand(opal_install_dirs.infodir);
    opal_install_dirs.mandir = 
        opal_install_dirs_expand(opal_install_dirs.mandir);
    opal_install_dirs.pkgdatadir = 
        opal_install_dirs_expand(opal_install_dirs.pkgdatadir);
    opal_install_dirs.pkglibdir = 
        opal_install_dirs_expand(opal_install_dirs.pkglibdir);
    opal_install_dirs.pkgincludedir = 
        opal_install_dirs_expand(opal_install_dirs.pkgincludedir);

#if 0
    fprintf(stderr, "prefix:         %s\n", opal_install_dirs.prefix);
    fprintf(stderr, "exec_prefix:    %s\n", opal_install_dirs.exec_prefix);
    fprintf(stderr, "bindir:         %s\n", opal_install_dirs.bindir);
    fprintf(stderr, "sbindir:        %s\n", opal_install_dirs.sbindir);
    fprintf(stderr, "libexecdir:     %s\n", opal_install_dirs.libexecdir);
    fprintf(stderr, "datarootdir:    %s\n", opal_install_dirs.datarootdir);
    fprintf(stderr, "datadir:        %s\n", opal_install_dirs.datadir);
    fprintf(stderr, "sysconfdir:     %s\n", opal_install_dirs.sysconfdir);
    fprintf(stderr, "sharedstatedir: %s\n", opal_install_dirs.sharedstatedir);
    fprintf(stderr, "localstatedir:  %s\n", opal_install_dirs.localstatedir);
    fprintf(stderr, "libdir:         %s\n", opal_install_dirs.libdir);
    fprintf(stderr, "includedir:     %s\n", opal_install_dirs.includedir);
    fprintf(stderr, "infodir:        %s\n", opal_install_dirs.infodir);
    fprintf(stderr, "mandir:         %s\n", opal_install_dirs.mandir);
    fprintf(stderr, "pkgdatadir:     %s\n", opal_install_dirs.pkgdatadir);
    fprintf(stderr, "pkglibdir:      %s\n", opal_install_dirs.pkglibdir);
    fprintf(stderr, "pkgincludedir:  %s\n", opal_install_dirs.pkgincludedir);
#endif

    for (i = 0 ; mca_installdirs_base_static_components[i] != NULL ; ++i) {
        if (NULL !=  mca_installdirs_base_static_components[i]->mca_close_component) {
            mca_installdirs_base_static_components[i]->mca_close_component();
        }
    }

    return OPAL_SUCCESS;
}
/**
 * Function for weeding out btl components that don't want to run.
 *
 * Call the init function on all available components to find out if
 * they want to run.  Select all components that don't fail.  Failing
 * components will be closed and unloaded.  The selected modules will
 * be returned to the caller in a opal_list_t.
 */
int mca_btl_base_select(bool enable_progress_threads,
                        bool enable_mpi_threads)
{
    int i, num_btls;
    opal_list_item_t *item;
    mca_base_component_list_item_t *cli;
    mca_btl_base_component_t *component;
    mca_btl_base_module_t **modules;
    mca_btl_base_selected_module_t *sm;

    char** include = opal_argv_split(mca_btl_base_include, ',');
    char** exclude = opal_argv_split(mca_btl_base_exclude, ',');

    /* Traverse the list of opened modules; call their init
       functions. */

    item  = opal_list_get_first(&mca_btl_base_components_opened);
    while(item != opal_list_get_end(&mca_btl_base_components_opened)) {
        opal_list_item_t *next = opal_list_get_next(item);
        cli = (mca_base_component_list_item_t *) item;

        component = (mca_btl_base_component_t *) cli->cli_component;

        /* if there is an include list - item must be in the list to be included */
        if ( NULL != include ) {
            char** argv = include; 
            bool found = false;
            while(argv && *argv) {
                if(strcmp(component->btl_version.mca_component_name,*argv) == 0) {
                    found = true;
                    break;
                }
                argv++;
            }
            if(found == false) {
                item = next;
                continue;
            }

            /* otherwise - check the exclude list to see if this item has been specifically excluded */
        } else if ( NULL != exclude ) {
            char** argv = exclude; 
            bool found = false;
            while(argv && *argv) {
                if(strcmp(component->btl_version.mca_component_name,*argv) == 0) {
                    found = true;
                    break;
                }
                argv++;
            }
            if(found == true) {
                item = next;
                continue;
            }
        }

        opal_output_verbose(10, mca_btl_base_output, 
                            "select: initializing %s component %s",
                            component->btl_version.mca_type_name,
                            component->btl_version.mca_component_name);
        if (NULL == component->btl_init) {
            opal_output_verbose(10, mca_btl_base_output,
                                "select: no init function; ignoring component %s",
                                component->btl_version.mca_component_name);
        } else {
            modules = component->btl_init(&num_btls, enable_progress_threads,
                                          enable_mpi_threads);

            /* If the component didn't initialize, remove it from the opened
               list and remove it from the component repository */

            if (NULL == modules) {
                opal_output_verbose(10, mca_btl_base_output,
                                    "select: init of component %s returned failure",
                                    component->btl_version.mca_component_name);
                opal_output_verbose(10, mca_btl_base_output,
                                    "select: module %s unloaded",
                                    component->btl_version.mca_component_name);

                mca_base_component_repository_release((mca_base_component_t *) component);
                opal_list_remove_item(&mca_btl_base_components_opened, item);
            } 

            /* Otherwise, it initialized properly.  Save it. */

            else {
                opal_output_verbose(10, mca_btl_base_output,
                                    "select: init of component %s returned success",
                                    component->btl_version.mca_component_name);

                for (i = 0; i < num_btls; ++i) {
                    sm = OBJ_NEW(mca_btl_base_selected_module_t);
                    if (NULL == sm) {
                        return OMPI_ERR_OUT_OF_RESOURCE;
                    }
                    sm->btl_component = component;
                    sm->btl_module = modules[i];
                    opal_list_append(&mca_btl_base_modules_initialized,
                                     (opal_list_item_t*) sm);
                }
                free(modules);
            }
        }
        item = next;
    }

    /* Finished querying all components.  Check for the bozo case. */

    if (0 == opal_list_get_size(&mca_btl_base_modules_initialized)) {
        orte_show_help("help-mca-base.txt", "find-available:none-found", true,
                       "btl");
        orte_errmgr.abort(1, NULL);
    }
    return OMPI_SUCCESS;
}
Example #3
0
static int show_help(const char *filename, const char *topic,
                     const char *output, orte_process_name_t *sender)
{
    int rc;
    tuple_list_item_t *tli = NULL;
    orte_namelist_t *pnli;
    time_t now = time(NULL);

    /* If we're aggregating, check for duplicates.  Otherwise, don't
       track duplicates at all and always display the message. */
    if (orte_help_want_aggregate) {
        rc = get_tli(filename, topic, &tli);
    } else {
        rc = ORTE_ERR_NOT_FOUND;
    }

    /* If there's no output string (i.e., this is a control message
       asking us to suppress), then skip to the end. */
    if (NULL == output) {
        tli->tli_display = false;
        goto after_output;
    }

    /* Was it already displayed? */
    if (ORTE_SUCCESS == rc) {
        /* Yes.  But do we want to print anything?  That's complicated.

           We always show the first message of a given (filename,
           topic) tuple as soon as it arrives.  But we don't want to
           show duplicate notices often, because we could get overrun
           with them.  So we want to gather them up and say "We got N
           duplicates" every once in a while.

           And keep in mind that at termination, we'll unconditionally
           show all accumulated duplicate notices.

           A simple scheme is as follows:
           - when the first of a (filename, topic) tuple arrives
             - print the message
             - if a timer is not set, set T=now
           - when a duplicate (filename, topic) tuple arrives
             - if now>(T+5) and timer is not set (due to
               non-pre-emptiveness of our libevent, a timer *could* be
               set!)
               - print all accumulated duplicates
               - reset T=now
             - else if a timer was not set, set the timer for T+5
             - else if a timer was set, do nothing (just wait)
           - set T=now when the timer expires
        */           
        ++tli->tli_count_since_last_display;
        if (now > show_help_time_last_displayed + 5 && !show_help_timer_set) {
            show_accumulated_duplicates(0, 0, NULL);
        } else if (!show_help_timer_set) {
            opal_event_evtimer_set(orte_event_base, &show_help_timer_event,
                                   show_accumulated_duplicates, NULL);
            opal_event_evtimer_add(&show_help_timer_event, &show_help_interval);
            show_help_timer_set = true;
        }
    } 
    /* Not already displayed */
    else if (ORTE_ERR_NOT_FOUND == rc) {
        if (orte_xml_output) {
            char *tmp;
            tmp = xml_format((unsigned char*)output);
            fprintf(orte_xml_fp, "%s", tmp);
            fflush(orte_xml_fp);
            free(tmp);
        } else {
            opal_output(orte_clean_output, "%s", output);
        }
        if (!show_help_timer_set) {
            show_help_time_last_displayed = now;
        }
    }
    /* Some other error occurred */
    else {
        ORTE_ERROR_LOG(rc);
        return rc;
    }

 after_output:
    /* If we're aggregating, add this process name to the list */
    if (orte_help_want_aggregate) {
        pnli = OBJ_NEW(orte_namelist_t);
        if (NULL == pnli) {
            rc = ORTE_ERR_OUT_OF_RESOURCE;
            ORTE_ERROR_LOG(rc);
            return rc;
        }
        pnli->name = *sender;
        opal_list_append(&(tli->tli_processes), &(pnli->super));
    }
    return ORTE_SUCCESS;
}
static void mca_btl_tcp_event_construct(mca_btl_tcp_event_t* event)
{
    OPAL_THREAD_LOCK(&mca_btl_tcp_component.tcp_lock);
    opal_list_append(&mca_btl_tcp_component.tcp_events, &event->item);
    OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
}
Example #5
0
File: db_hash.c Project: IanYXXL/A1
static int store(const opal_identifier_t *uid,
                 opal_scope_t scope,
                 const char *key, const void *data,
                 opal_data_type_t type)
{
    proc_data_t *proc_data;
    opal_value_t *kv;
    opal_byte_object_t *boptr;
    opal_identifier_t id;

    /* data must have an assigned scope */
    if (OPAL_SCOPE_UNDEF == scope) {
        return OPAL_ERR_BAD_PARAM;
    }

    /* to protect alignment, copy the data across */
    memcpy(&id, uid, sizeof(opal_identifier_t));

    /* we are at the bottom of the store priorities, so
     * if this fell to us, we store it
     */
    opal_output_verbose(1, opal_db_base_framework.framework_output,
                        "db:hash:store storing data for proc %" PRIu64 " for scope %d",
                        id, (int)scope);

    /* lookup the proc data object for this proc */
    if (NULL == (proc_data = lookup_opal_proc(&hash_data, id))) {
        /* unrecoverable error */
        OPAL_OUTPUT_VERBOSE((5, opal_db_base_framework.framework_output,
                             "db:hash:store: storing key %s[%s] for proc %" PRIu64 " unrecoverably failed",
                             key, opal_dss.lookup_data_type(type), id));
        return OPAL_ERR_OUT_OF_RESOURCE;
    }

    /* see if we already have this key in the data - means we are updating
     * a pre-existing value
     */
    kv = lookup_keyval(proc_data, key);
    OPAL_OUTPUT_VERBOSE((5, opal_db_base_framework.framework_output,
                         "db:hash:store: %s key %s[%s] for proc %" PRIu64 "",
                         (NULL == kv ? "storing" : "updating"),
                         key, opal_dss.lookup_data_type(type), id));

    if (NULL != kv) {
        opal_list_remove_item(&proc_data->data, &kv->super);
        OBJ_RELEASE(kv);
    }
    kv = OBJ_NEW(opal_value_t);
    kv->key = strdup(key);
    kv->scope = scope;
    opal_list_append(&proc_data->data, &kv->super);

    /* the type could come in as an OPAL one (e.g., OPAL_VPID). Since
     * the value is an OPAL definition, it cannot cover OPAL data
     * types, so convert to the underlying OPAL type
     */
    switch (type) {
    case OPAL_STRING:
        kv->type = OPAL_STRING;
        if (NULL != data) {
            kv->data.string = strdup( (const char *) data);
        } else {
            kv->data.string = NULL;
        }
        break;
    case OPAL_UINT64:
        if (NULL == data) {
            OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM);
            return OPAL_ERR_BAD_PARAM;
        }
        kv->type = OPAL_UINT64;
        kv->data.uint64 = *(uint64_t*)(data);
        break;
    case OPAL_UINT32:
        if (NULL == data) {
            OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM);
            return OPAL_ERR_BAD_PARAM;
        }
        kv->type = OPAL_UINT32;
        kv->data.uint32 = *(uint32_t*)data;
        break;
    case OPAL_UINT16:
        if (NULL == data) {
            OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM);
            return OPAL_ERR_BAD_PARAM;
        }
        kv->type = OPAL_UINT16;
        kv->data.uint16 = *(uint16_t*)(data);
        break;
    case OPAL_INT:
        if (NULL == data) {
            OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM);
            return OPAL_ERR_BAD_PARAM;
        }
        kv->type = OPAL_INT;
        kv->data.integer = *(int*)(data);
        break;
    case OPAL_UINT:
        if (NULL == data) {
            OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM);
            return OPAL_ERR_BAD_PARAM;
        }
        kv->type = OPAL_UINT;
        kv->data.uint = *(unsigned int*)(data);
        break;
    case OPAL_FLOAT:
        if (NULL == data) {
            OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM);
            return OPAL_ERR_BAD_PARAM;
        }
        kv->type = OPAL_FLOAT;
        kv->data.fval = *(float*)(data);
        break;
    case OPAL_BYTE_OBJECT:
        kv->type = OPAL_BYTE_OBJECT;
        boptr = (opal_byte_object_t*)data;
        if (NULL != boptr && NULL != boptr->bytes && 0 < boptr->size) {
            kv->data.bo.bytes = (uint8_t *) malloc(boptr->size);
            memcpy(kv->data.bo.bytes, boptr->bytes, boptr->size);
            kv->data.bo.size = boptr->size;
        } else {
            kv->data.bo.bytes = NULL;
            kv->data.bo.size = 0;
        }
        break;
    default:
        OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED);
        return OPAL_ERR_NOT_SUPPORTED;
    }

    return OPAL_SUCCESS;
}
Example #6
0
/* we can only enter this routine if no other allocation
 * was found, so we only need to know that finding any
 * relative node syntax should generate an immediate error
 */
int orte_util_add_dash_host_nodes(opal_list_t *nodes,
                                  bool *override_oversubscribed,
                                  char ** host_argv)
{
    opal_list_item_t* item;
    orte_std_cntr_t i, j, k;
    int rc;
    char **mapped_nodes = NULL, **mini_map;
    orte_node_t *node;

    /* Accumulate all of the host name mappings */
    for (j = 0; j < opal_argv_count(host_argv); ++j) {
        mini_map = opal_argv_split(host_argv[j], ',');
        
        if (mapped_nodes == NULL) {
            mapped_nodes = mini_map;
        } else {
            for (k = 0; NULL != mini_map[k]; ++k) {
                rc = opal_argv_append_nosize(&mapped_nodes, 
                                             mini_map[k]);
                if (OPAL_SUCCESS != rc) {
                    goto cleanup;
                }
            }
            opal_argv_free(mini_map);
        }
    }

    /* Did we find anything? If not, then do nothing */
    if (NULL == mapped_nodes) {
        return ORTE_SUCCESS;
    }
    
    /*  go through the names found and
       add them to the host list. If they're not unique, then
       bump the slots count for each duplicate */
    
    for (i = 0; NULL != mapped_nodes[i]; ++i) {
        /* if the specified node contains a relative node syntax,
         * this is an error
         */
        if ('+' == mapped_nodes[i][0]) {
            orte_show_help("help-dash-host.txt", "dash-host:relative-syntax",
                           true, mapped_nodes[i]);
            rc = ORTE_ERR_SILENT;
            goto cleanup;
        }
        
        /* see if the node is already on the list */
        for (item = opal_list_get_first(nodes); 
             item != opal_list_get_end(nodes);
             item = opal_list_get_next(item)) {
            node = (orte_node_t*) item;
            if (0 == strcmp(node->name, mapped_nodes[i]) ||
               (0 == strcmp(node->name, orte_process_info.nodename) &&
               (0 == strcmp(mapped_nodes[i], "localhost") || opal_ifislocal(mapped_nodes[i])))) {
                ++node->slots;
                break;
            }
        }
        
        /* If we didn't find it, add it to the list */
        
        if (item == opal_list_get_end(nodes)) {
            node = OBJ_NEW(orte_node_t);
            if (NULL == node) {
                return ORTE_ERR_OUT_OF_RESOURCE;
            }
            /* check to see if this is a local name */
            if (0 == strcmp(mapped_nodes[i], "localhost") ||
                opal_ifislocal(mapped_nodes[i])) {
                /* it is local, so use the local nodename to avoid
                 * later confusion
                 */
                if (orte_show_resolved_nodenames &&
                    0 != strcmp(mapped_nodes[i], orte_process_info.nodename)) {
                    /* add to list of aliases for this node - only add if unique */
                    opal_argv_append_unique_nosize(&node->alias, mapped_nodes[i]);
                }
                node->name = strdup(orte_process_info.nodename);
            } else {
                /* not local - use the given name */
                node->name = strdup(mapped_nodes[i]);
            }
            node->state = ORTE_NODE_STATE_UP;
            node->slots_inuse = 0;
            node->slots_max = 0;
            node->slots = 1;
            /* indicate that ORTE should override any oversubscribed conditions
             * based on local hardware limits since the user (a) might not have
             * provided us any info on the #slots for a node, and (b) the user
             * might have been wrong! If we don't check the number of local physical
             * processors, then we could be too aggressive on our sched_yield setting
             * and cause performance problems.
             */
            *override_oversubscribed = true;
            opal_list_append(nodes, &node->super);
        }
    }
    rc = ORTE_SUCCESS;

cleanup:
    if (NULL != mapped_nodes) {
        opal_argv_free(mapped_nodes);
    }

    return rc;
}
static int
orte_ras_alps_read_appinfo_file(opal_list_t *nodes, char *filename,
                                unsigned int *uMe)
{
    int             iq;
    int             ix;
    int             iFd;                    /* file descriptor for appinfo    */
    int             iTrips;                 /* counter appinfo read attempts  */
    int             max_appinfo_read_attempts;
    struct stat     ssBuf;                  /* stat buffer                    */
    size_t          szLen;                  /* size of appinfo (file)         */
    off_t           oNow;                   /* current appinfo data offset    */
    off_t           oInfo=sizeof(appInfoHdr_t);
    off_t           oDet=sizeof(appInfo_t);
    off_t           oSlots;
    off_t           oEntry;
    int32_t         sNodes=0;
    char            *cpBuf;
    char            *hostname;
    orte_node_t     *node = NULL, *n2;
    appInfoHdr_t    *apHdr;                 /* ALPS header structure          */
    appInfo_t       *apInfo;                /* ALPS table info structure      */
#if ALPS_APPINFO_VERSION==0
    placeList_t     *apSlots;               /* ALPS node specific info        */
#else
    placeNodeList_t *apNodes;
#endif
    bool            added;
    opal_list_item_t *item;

    orte_ras_alps_get_appinfo_attempts(&max_appinfo_read_attempts);
    oNow=0;
    iTrips=0;
    opal_output_verbose(1, orte_ras_base.ras_output,
                        "ras:alps:allocate: begin processing appinfo file");

    while(!oNow) {                          /* Until appinfo read is complete */
        iTrips++;                           /* Increment trip count           */

        iFd=open( filename, O_RDONLY );
        if( iFd==-1 ) {                     /* If file absent, ALPS is down   */
            opal_output_verbose(1, orte_ras_base.ras_output,
                                "ras:alps:allocate: ALPS information open failure");
            usleep(iTrips*50000);           /* Increasing delays, .05 s/try   */

            /*          Fail only when number of attempts have been exhausted.            */
            if( iTrips <= max_appinfo_read_attempts ) continue;
            ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE);
            return ORTE_ERR_FILE_OPEN_FAILURE;
        }
        if( fstat( iFd, &ssBuf )==-1 ) {    /* If stat fails, access denied   */

            ORTE_ERROR_LOG(ORTE_ERR_NOT_AVAILABLE);
            return ORTE_ERR_NOT_AVAILABLE;
        }

        szLen=ssBuf.st_size;                /* Get buffer size                */
        cpBuf=malloc(szLen+1);              /* Allocate buffer                */
        if (NULL == cpBuf) {
            ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
            return ORTE_ERR_OUT_OF_RESOURCE;
        }

        /*      Repeated attempts to read appinfo, with an increasing delay between   *
         *      successive attempts to allow scheduler I/O a chance to complete.      */
        if( (oNow=read( iFd, cpBuf, szLen ))!=(off_t)szLen ) {

            /*          This is where apstat fails; we will record it and try again.      */
            opal_output_verbose(1, orte_ras_base.ras_output,
                                "ras:alps:allocate: ALPS information read failure: %ld bytes", (long int)oNow);

            free(cpBuf);                    /* Free (old) buffer              */
            close(iFd);                     /* Close (old) descriptor         */
            oNow=0;                         /* Reset byte count               */
            usleep(iTrips*50000);           /* Increasing delays, .05 s/try   */

            /*          Fail only when number of attempts have been exhausted.            */
            if( iTrips<=max_appinfo_read_attempts ) continue;
            ORTE_ERROR_LOG(ORTE_ERR_FILE_READ_FAILURE);
            return ORTE_ERR_FILE_READ_FAILURE;
        }
    }
    close(iFd);
    opal_output_verbose(1, orte_ras_base.ras_output,
                        "ras:alps:allocate: file %s read", filename);

    /*  Now that we have the scheduler information, we just have to parse it for  *
     *  the data that we seek.                                                    */
    oNow=0;
    apHdr=(appInfoHdr_t *)cpBuf;

    opal_output_verbose(1, orte_ras_base.ras_output,
                        "ras:alps:allocate: %d entries in file", apHdr->apNum);

    /*  Header info (apHdr) tells us how many entries are in the file:            *
     *                                                                            *
     *      apHdr->apNum                                                          */

    for( iq=0; iq<apHdr->apNum; iq++ ) {    /*  Parse all entries in file     */

        /*      Just at this level, a lot of information is available:                *
         *                                                                            *
         *          apInfo->apid         ... ALPS job ID                              *
         *          apInfo->resId        ... ALPS reservation ID                      *
         *          apInfo->numCmds      ... Number of executables                    *
         *          apInfo->numPlaces    ... Number of PEs                            */
        apInfo=(appInfo_t *)(cpBuf+oNow+oInfo);

        /*      Calculate the dependent offsets.                                      */
        oSlots=sizeof(cmdDetail_t)*apInfo->numCmds;

        opal_output_verbose(1, orte_ras_base.ras_output,
                            "ras:alps:allocate: read data for resId %u - myId %u",
                            apInfo->resId, *uMe);


#if ALPS_APPINFO_VERSION==0

        /*      Finally, we get to the actual node-specific information:              *
         *                                                                            *
         *          apSlots[ix].cmdIx    ... index of apDet[].cmd                     *
         *          apSlots[ix].nid      ... NodeID (NID)                             *
         *          apSlots[ix].procMask ... mask for processors... need 16-bit shift */
        apSlots=(placeList_t *)(cpBuf+oNow+oInfo+oDet+oSlots);
        oEntry=sizeof(placeList_t)*apInfo->numPlaces;

        oNow+=(oDet+oSlots+oEntry);         /* Target next slot               */

        if( apInfo->resId != *uMe ) continue; /* Filter to our reservation Id */

        /* in this early version of alps, there is one entry for each PE in the
         * allocation - so cycle across the numPlaces entries, assigning a slot
         * for each time a node is named
         */
        for( ix=0; ix<apInfo->numPlaces; ix++ ) {

            opal_output_verbose(5, orte_ras_base.ras_output,
                                "ras:alps:read_appinfo: got NID %d", apSlots[ix].nid);

            asprintf( &hostname, "%d", apSlots[ix].nid );
            if (NULL == hostname) {
                ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
                return ORTE_ERR_OUT_OF_RESOURCE;
            }

            /*          If this matches the prior nodename, just add to the slot count.   */
            if( NULL!=node && !strcmp(node->name, hostname) ) {

                free(hostname);             /* free hostname since not needed */
                ++node->slots;
            } else {                        /* must be new, so add to list    */

                opal_output_verbose(1, orte_ras_base.ras_output,
                                    "ras:alps:read_appinfo: added NID %d to list", apSlots[ix].nid);

                node = OBJ_NEW(orte_node_t);
                node->name = hostname;
                node->launch_id = apSlots[ix].nid;
                node->slots_inuse = 0;
                node->slots_max = 0;
                node->slots = 1;
                /* need to order these node ids so the regex generator
                 * can properly function
                 */
                added = false;
                for (item = opal_list_get_first(nodes);
                     item != opal_list_get_end(nodes);
                     item = opal_list_get_next(item)) {
                    n2 = (orte_node_t*)item;
                    if (node->launch_id < n2->launch_id) {
                        /* insert the new node before this one */
                        opal_list_insert_pos(nodes, item, &node->super);
                        added = true;
                        break;
                    }
                }
                if (!added) {
                    /* add it to the end */
                    opal_list_append(nodes, &node->super);
                }
                sNodes++;                   /* Increment the node count       */
            }
        }
#else
        /* in newer versions of alps, there is one entry for each node in the
         * allocation, and that struct directly carries the number of PEs
         * allocated on that node to this job.
         */
        apNodes=(placeNodeList_t *)(cpBuf+oNow+oInfo+oDet+oSlots);
        oEntry=sizeof(placeNodeList_t)*apInfo->numPlaces;

        oNow+=(oDet+oSlots+oEntry);         /* Target next entry               */

        if( apInfo->resId != *uMe ) continue; /* Filter to our reservation Id */

        for( ix=0; ix<apInfo->numPlaces; ix++ ) {
            opal_output_verbose(5, orte_ras_base.ras_output,
                                "ras:alps:read_appinfo(modern): processing NID %d with %d slots",
                                apNodes[ix].nid, apNodes[ix].numPEs);
            asprintf( &hostname, "%d", apNodes[ix].nid );
            if (NULL == hostname) {
                ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
                return ORTE_ERR_OUT_OF_RESOURCE;
            }

            node = OBJ_NEW(orte_node_t);
            node->name = hostname;
            node->launch_id = apNodes[ix].nid;
            node->slots_inuse = 0;
            node->slots_max = 0;
            node->slots = apNodes[ix].numPEs;
            /* need to order these node ids so the regex generator
             * can properly function
             */
            added = false;
            for (item = opal_list_get_first(nodes);
                 item != opal_list_get_end(nodes);
                 item = opal_list_get_next(item)) {
                n2 = (orte_node_t*)item;
                if (node->launch_id < n2->launch_id) {
                    /* insert the new node before this one */
                    opal_list_insert_pos(nodes, item, &node->super);
                    added = true;
                    break;
                }
            }
            if (!added) {
                /* add it to the end */
                opal_list_append(nodes, &node->super);
            }
            sNodes++;                   /* Increment the node count       */
        }
#endif
        break;                              /* Extended details ignored       */
    }
    free(cpBuf);                            /* Free the buffer                */

    return ORTE_SUCCESS;
}
Example #8
0
int mca_btl_tcp_add_procs( struct mca_btl_base_module_t* btl,
                           size_t nprocs,
                           struct opal_proc_t **procs,
                           struct mca_btl_base_endpoint_t** peers,
                           opal_bitmap_t* reachable )
{
    mca_btl_tcp_module_t* tcp_btl = (mca_btl_tcp_module_t*)btl;
    const opal_proc_t* my_proc; /* pointer to caller's proc structure */
    int i, rc;

    /* get pointer to my proc structure */
    if( NULL == (my_proc = opal_proc_local_get()) )
        return OPAL_ERR_OUT_OF_RESOURCE;

    for(i = 0; i < (int) nprocs; i++) {

        struct opal_proc_t* opal_proc = procs[i];
        mca_btl_tcp_proc_t* tcp_proc;
        mca_btl_base_endpoint_t* tcp_endpoint;
        bool existing_found = false;

        /* Do not create loopback TCP connections */
        if( my_proc == opal_proc ) {
            continue;
        }

        if(NULL == (tcp_proc = mca_btl_tcp_proc_create(opal_proc))) {
            continue;
        }

        /*
         * Check to make sure that the peer has at least as many interface
         * addresses exported as we are trying to use. If not, then
         * don't bind this BTL instance to the proc.
         */

        OPAL_THREAD_LOCK(&tcp_proc->proc_lock);

        for (uint32_t j = 0 ; j < (uint32_t)tcp_proc->proc_endpoint_count ; ++j) {
            tcp_endpoint = tcp_proc->proc_endpoints[j];
            if (tcp_endpoint->endpoint_btl == tcp_btl) {
                existing_found = true;
                break;
            }
        }

        if (!existing_found) {
            /* The btl_proc datastructure is shared by all TCP BTL
             * instances that are trying to reach this destination.
             * Cache the peer instance on the btl_proc.
             */
            tcp_endpoint = OBJ_NEW(mca_btl_tcp_endpoint_t);
            if(NULL == tcp_endpoint) {
                OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
                return OPAL_ERR_OUT_OF_RESOURCE;
            }

            tcp_endpoint->endpoint_btl = tcp_btl;
            rc = mca_btl_tcp_proc_insert(tcp_proc, tcp_endpoint);
            if(rc != OPAL_SUCCESS) {
                OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
                OBJ_RELEASE(tcp_endpoint);
                continue;
            }

            OPAL_THREAD_LOCK(&tcp_btl->tcp_endpoints_mutex);
            opal_list_append(&tcp_btl->tcp_endpoints, (opal_list_item_t*)tcp_endpoint);
            OPAL_THREAD_UNLOCK(&tcp_btl->tcp_endpoints_mutex);
        }

        OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);

        if (NULL != reachable) {
            opal_bitmap_set_bit(reachable, i);
        }

        peers[i] = tcp_endpoint;

        /* we increase the count of MPI users of the event library
           once per peer, so that we are used until we aren't
           connected to a peer */
        opal_progress_event_users_increment();
    }

    return OPAL_SUCCESS;
}
static void 
rml_oob_queued_progress(int fd, short event, void *arg)
{
    orte_rml_oob_queued_msg_t *qmsg;
    orte_rml_oob_msg_header_t *hdr;
    int real_tag;
    int ret;
    orte_process_name_t next, origin;

    while (true) {
        OPAL_THREAD_LOCK(&orte_rml_oob_module.queued_lock);
        qmsg = (orte_rml_oob_queued_msg_t*) opal_list_remove_first(&orte_rml_oob_module.queued_routing_messages);
        OPAL_THREAD_UNLOCK(&orte_rml_oob_module.queued_lock);
        if (NULL == qmsg) break;

        hdr = (orte_rml_oob_msg_header_t*) qmsg->payload[0].iov_base;
        origin = hdr->origin;

        next = orte_routed.get_route(&hdr->destination);
        if (next.vpid == ORTE_VPID_INVALID) {
            opal_output(0,
                        "%s:queued progress tried routing message from %s to %s:%d, can't find route",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        ORTE_NAME_PRINT(&hdr->origin),
                        ORTE_NAME_PRINT(&hdr->destination),
                        hdr->tag);
            opal_backtrace_print(stderr);
            orte_errmgr.abort(ORTE_ERROR_DEFAULT_EXIT_CODE, NULL);
        }

        if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &next, ORTE_PROC_MY_NAME)) {
            opal_output(0, "%s:queued progress trying to get message from %s to %s:%d, routing loop",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        ORTE_NAME_PRINT(&hdr->origin),
                        ORTE_NAME_PRINT(&hdr->destination),
                        hdr->tag);
            opal_backtrace_print(stderr);
            orte_errmgr.abort(ORTE_ERROR_DEFAULT_EXIT_CODE, NULL);
        }

        if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &next, &hdr->destination)) {
            real_tag = hdr->tag;
        } else {
            real_tag = ORTE_RML_TAG_RML_ROUTE;
        }

        OPAL_OUTPUT_VERBOSE((1, orte_rml_base_output,
                             "%s routing message from %s for %s to %s (tag: %d)",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_NAME_PRINT(&hdr->origin),
                             ORTE_NAME_PRINT(&hdr->destination),
                             ORTE_NAME_PRINT(&next),
                             hdr->tag));

        ORTE_RML_OOB_MSG_HEADER_HTON(*hdr);

        ret = orte_rml_oob_module.active_oob->oob_send_nb(&next,
                                                          &origin,
                                                          qmsg->payload,
                                                          1,
                                                          real_tag,
                                                          0,
                                                          rml_oob_recv_route_queued_send_callback,
                                                          qmsg);

        if (ORTE_SUCCESS != ret) {
            if (ORTE_ERR_ADDRESSEE_UNKNOWN == ret) {
                /* still no route -- try again */
                ORTE_RML_OOB_MSG_HEADER_NTOH(*hdr);
                OPAL_THREAD_LOCK(&orte_rml_oob_module.queued_lock);
                opal_list_append(&orte_rml_oob_module.queued_routing_messages,
                                 &qmsg->super);
                if (1 == opal_list_get_size(&orte_rml_oob_module.queued_routing_messages)) {
                    opal_event_evtimer_add(orte_rml_oob_module.timer_event,
                                           &orte_rml_oob_module.timeout);
                }
                OPAL_THREAD_UNLOCK(&orte_rml_oob_module.queued_lock);
            } else {
                opal_output(0,
                            "%s failed to send message from %s to %s:%d %s (rc = %d)",
                            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                            ORTE_NAME_PRINT(&next),
                            ORTE_NAME_PRINT(&origin),
                            real_tag,
                            ORTE_ERROR_NAME(ret),
                            ret);
                abort();
            }
        }
    }
}
Example #10
0
/*
 * If we have a valid section, see if we have a matching section
 * somewhere (i.e., same vendor ID and vendor part ID).  If we do,
 * update the values.  If not, save the values in a new instance and
 * add it to the list.
 */
static int save_section(parsed_section_values_t *s)
{
    int i, j;
    opal_list_item_t *item;
    device_values_t *h;
    bool found;

    /* Is the parsed section valid? */
    if (NULL == s->name || 0 == s->vendor_ids_len ||
            0 == s->vendor_part_ids_len) {
        return OMPI_ERR_BAD_PARAM;
    }

    /* Iterate over each of the vendor/part IDs in the parsed
       values */
    for (i = 0; i < s->vendor_ids_len; ++i) {
        for (j = 0; j < s->vendor_part_ids_len; ++j) {
            found = false;

            /* Iterate over all the saved devices */
            for (item = opal_list_get_first(&devices);
                    item != opal_list_get_end(&devices);
                    item = opal_list_get_next(item)) {
                h = (device_values_t*) item;
                if (s->vendor_ids[i] == h->vendor_id &&
                        s->vendor_part_ids[j] == h->vendor_part_id) {
                    /* Found a match.  Update any newly-set values. */
                    if (s->values.mtu_set) {
                        h->values.mtu = s->values.mtu;
                        h->values.mtu_set = true;
                    }

                    if (s->values.use_eager_rdma_set) {
                        h->values.use_eager_rdma = s->values.use_eager_rdma;
                        h->values.use_eager_rdma_set = true;
                    }

                    if (NULL != s->values.receive_queues) {
                        h->values.receive_queues =
                            strdup(s->values.receive_queues);
                    }

                    if (s->values.max_inline_data_set) {
                        h->values.max_inline_data = s->values.max_inline_data;
                        h->values.max_inline_data_set = true;
                    }

                    if (s->values.rdmacm_reject_causes_connect_error_set) {
                        h->values.rdmacm_reject_causes_connect_error =
                            s->values.rdmacm_reject_causes_connect_error;
                        h->values.rdmacm_reject_causes_connect_error_set =
                            true;
                    }

                    found = true;
                    break;
                }
            }

            /* Did we find/update it in the exising list?  If not,
               create a new one. */
            if (!found) {
                h = OBJ_NEW(device_values_t);
                h->section_name = strdup(s->name);
                h->vendor_id = s->vendor_ids[i];
                h->vendor_part_id = s->vendor_part_ids[j];
                /* NOTE: There is a bug in the PGI 6.2 series that
                   causes the compiler to choke when copying structs
                   containing bool members by value.  So do a memcpy
                   here instead. */
                memcpy(&h->values, &s->values, sizeof(s->values));
                /* Need to strdup the string, though */
                if (NULL != h->values.receive_queues) {
                    h->values.receive_queues = strdup(s->values.receive_queues);
                }
                opal_list_append(&devices, &h->super);
            }
        }
    }

    /* All done */

    return OMPI_SUCCESS;
}
Example #11
0
/*****************
 * Listener Callbacks
 *****************/
static void errmgr_base_tool_cmdline_recv(int status,
                                         orte_process_name_t* sender,
                                         opal_buffer_t* buffer,
                                         orte_rml_tag_t tag,
                                         void* cbdata)
{
    int ret;
    orte_process_name_t swap_dest;
    orte_errmgr_tool_cmd_flag_t command;
    orte_std_cntr_t count = 1;
    char *off_nodes  = NULL;
    char *off_procs  = NULL;
    char *onto_nodes = NULL;
    char **split_off_nodes  = NULL;
    char **split_off_procs  = NULL;
    char **split_onto_nodes = NULL;
    opal_list_t *proc_list = NULL;
    opal_list_t *node_list = NULL;
    opal_list_t *suggested_map_list = NULL;
    orte_errmgr_predicted_proc_t *off_proc = NULL;
    orte_errmgr_predicted_node_t *off_node = NULL;
    orte_errmgr_predicted_map_t  *onto_map = NULL;
    int cnt = 0, i;


    if( ORTE_RML_TAG_MIGRATE != tag ) {
        opal_output(orte_errmgr_base_framework.framework_output,
                    "errmgr:base:tool:recv() Error: Unknown tag: Received a command message from %s (tag = %d).",
                    ORTE_NAME_PRINT(sender), tag);
        ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
        return;
    }

    OPAL_OUTPUT_VERBOSE((10, orte_errmgr_base_framework.framework_output,
                         "errmgr:base:tool:recv() Command Line: Start a migration operation [Sender = %s]",
                         ORTE_NAME_PRINT(sender)));

    errmgr_cmdline_recv_issued = false; /* Not a persistent RML message */

    /*
     * If we are already interacting with a command line tool then reject this
     * request. Since we only allow the processing of one tool command at a
     * time.
     */
    if( OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_NAME_INVALID, &errmgr_cmdline_sender) ) {
        swap_dest.jobid = errmgr_cmdline_sender.jobid;
        swap_dest.vpid  = errmgr_cmdline_sender.vpid;

        errmgr_cmdline_sender = *sender;
        orte_errmgr_base_migrate_update(ORTE_ERRMGR_MIGRATE_STATE_ERR_INPROGRESS);

        errmgr_cmdline_sender.jobid = swap_dest.jobid;
        errmgr_cmdline_sender.vpid  = swap_dest.vpid;

        return;
    }

    errmgr_cmdline_sender = *sender;

    count = 1;
    if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &command, &count, ORTE_ERRMGR_MIGRATE_TOOL_CMD))) {
        ORTE_ERROR_LOG(ret);
        return;
    }

    /*
     * orte-migrate has requested that a checkpoint be taken
     */
    if (ORTE_ERRMGR_MIGRATE_TOOL_INIT_CMD == command) {
        OPAL_OUTPUT_VERBOSE((10, orte_errmgr_base_framework.framework_output,
                             "errmgr:base:tool:recv() Command line requested process migration [command %d]\n",
                             command));

        /*
         * Unpack the buffer from the orte-migrate command
         */
        count = 1;
        if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &(off_procs), &count, OPAL_STRING))) {
            ORTE_ERROR_LOG(ret);
	    return;
        }

        if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &(off_nodes), &count, OPAL_STRING))) {
            ORTE_ERROR_LOG(ret);
            return;
        }

        if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &(onto_nodes), &count, OPAL_STRING))) {
            ORTE_ERROR_LOG(ret);
            return;
        }

        /*
         * Parse the comma separated list
         */
        proc_list = OBJ_NEW(opal_list_t);
        node_list = OBJ_NEW(opal_list_t);
        suggested_map_list = OBJ_NEW(opal_list_t);

        split_off_procs     = opal_argv_split(off_procs,  ',');
        cnt = opal_argv_count(split_off_procs);
        if( cnt > 0 ) {
            for(i = 0; i < cnt; ++i) {
                off_proc = OBJ_NEW(orte_errmgr_predicted_proc_t);
                off_proc->proc_name.vpid = atoi(split_off_procs[i]);
                opal_list_append(proc_list, &(off_proc->super));
            }
        }

        split_off_nodes     = opal_argv_split(off_nodes,  ',');
        cnt = opal_argv_count(split_off_nodes);
        if( cnt > 0 ) {
            for(i = 0; i < cnt; ++i) {
                off_node = OBJ_NEW(orte_errmgr_predicted_node_t);
                off_node->node_name = strdup(split_off_nodes[i]);
                opal_list_append(node_list, &(off_node->super));
            }
        }

        split_onto_nodes    = opal_argv_split(onto_nodes, ',');
        cnt = opal_argv_count(split_onto_nodes);
        if( cnt > 0 ) {
            for(i = 0; i < cnt; ++i) {
                onto_map = OBJ_NEW(orte_errmgr_predicted_map_t);
                onto_map->map_node_name = strdup(split_onto_nodes[i]);
                opal_list_append(suggested_map_list, &(onto_map->super));
            }
        }

        /*
         * Pass to the predicted fault function to see how they would like to progress
         */
        orte_errmgr.predicted_fault(proc_list, node_list, suggested_map_list);
    }
    /*
     * Unknown command
     */
    else {
        OPAL_OUTPUT_VERBOSE((10, orte_errmgr_base_framework.framework_output,
                             "errmgr:base:tool:recv() Command line sent an unknown command (command %d)\n",
                             command));
        ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED);
    }

    return;
}
Example #12
0
/* configure using getifaddrs(3) */
static int if_bsdx_open(void)
{
    struct ifaddrs **ifadd_list;
    struct ifaddrs *cur_ifaddrs;
    struct sockaddr_in* sin_addr;

    /* 
     * the manpage claims that getifaddrs() allocates the memory,
     * and freeifaddrs() is later used to release the allocated memory.
     * however, without this malloc the call to getifaddrs() segfaults
     */
    ifadd_list = (struct ifaddrs **) malloc(sizeof(struct ifaddrs*));

    /* create the linked list of ifaddrs structs */
    if (getifaddrs(ifadd_list) < 0) {
        opal_output(0, "opal_ifinit: getifaddrs() failed with error=%d\n",
                    errno);
        return OPAL_ERROR;
    }

    for (cur_ifaddrs = *ifadd_list; NULL != cur_ifaddrs; 
         cur_ifaddrs = cur_ifaddrs->ifa_next) {
        opal_if_t *intf;
        struct in_addr a4;

        /* skip non- af_inet interface addresses */
        if (AF_INET != cur_ifaddrs->ifa_addr->sa_family) {
            continue;
        }

        /* skip interface if it is down (IFF_UP not set) */
        if (0 == (cur_ifaddrs->ifa_flags & IFF_UP)) {
            continue;
        }

        /* skip interface if it is a loopback device (IFF_LOOPBACK set) */
        if (!opal_if_retain_loopback && 0 != (cur_ifaddrs->ifa_flags & IFF_LOOPBACK)) {
            continue;
        }

        /* or if it is a point-to-point interface */
        /* TODO: do we really skip p2p? */
        if (0 != (cur_ifaddrs->ifa_flags & IFF_POINTOPOINT)) {
            continue;
        }

        sin_addr = (struct sockaddr_in *) cur_ifaddrs->ifa_addr;

        intf = OBJ_NEW(opal_if_t);
        if (NULL == intf) {
            opal_output(0, "opal_ifinit: unable to allocate %d bytes\n",
                        (int) sizeof(opal_if_t));
            return OPAL_ERR_OUT_OF_RESOURCE;
        }

        /* fill values into the opal_if_t */
        memcpy(&a4, &(sin_addr->sin_addr), sizeof(struct in_addr));
            
        strncpy(intf->if_name, cur_ifaddrs->ifa_name, IF_NAMESIZE);
        intf->if_index = opal_list_get_size(&opal_if_list) + 1;
        ((struct sockaddr_in*) &intf->if_addr)->sin_addr = a4;
        ((struct sockaddr_in*) &intf->if_addr)->sin_family = AF_INET;
        ((struct sockaddr_in*) &intf->if_addr)->sin_len =  cur_ifaddrs->ifa_addr->sa_len;

        intf->if_mask = prefix( sin_addr->sin_addr.s_addr);
        intf->if_flags = cur_ifaddrs->ifa_flags;

        intf->if_kernel_index = 
            (uint16_t) if_nametoindex(cur_ifaddrs->ifa_name);

        opal_list_append(&opal_if_list, &(intf->super));
    }   /*  of for loop over ifaddrs list */

    return OPAL_SUCCESS;
}
Example #13
0
static int orte_rds_hostfile_query(orte_jobid_t job)
{
    opal_list_t existing;
    opal_list_t updates, rds_updates;
    opal_list_item_t *item;
    orte_rds_cell_desc_t *rds_item;
    orte_rds_cell_attr_t *new_attr;
    orte_ras_node_t *ras_item;
    int rc;

    if (orte_rds_hostfile_queried) {
        /* if we have already been queried, then
         * our info is on the registry, so just
         * return. Note that this restriction
         * may eventually be lifted - ideally, 
         * we might check to see if this is a
         * new file name and go ahead with the
         * query if so.
         */
        return ORTE_SUCCESS;
    }
    orte_rds_hostfile_queried = true;
    
    OBJ_CONSTRUCT(&existing, opal_list_t);
    OBJ_CONSTRUCT(&updates, opal_list_t);
    OBJ_CONSTRUCT(&rds_updates, opal_list_t);
    rc = orte_ras_base_node_query(&existing);
    if(ORTE_SUCCESS != rc) {
        goto cleanup;
    }

    rc = mca_base_param_find("rds", "hostfile", "path");
    mca_base_param_lookup_string(rc, &mca_rds_hostfile_component.path);

    rc = orte_rds_hostfile_parse(mca_rds_hostfile_component.path, &existing, &updates);
    if (ORTE_ERR_NOT_FOUND == rc) {
        if(mca_rds_hostfile_component.default_hostfile) {
            rc = ORTE_SUCCESS;
        } else {
            opal_show_help("help-rds-hostfile.txt", "rds:no-hostfile",
                           true,
                           mca_rds_hostfile_component.path);
        }
        goto cleanup;
    } else if (ORTE_SUCCESS != rc) {
        goto cleanup;
    }

    if ( !opal_list_is_empty(&updates) ) {

        /* Convert RAS update list to RDS update list */
        for ( ras_item  = (orte_ras_node_t*)opal_list_get_first(&updates);
              ras_item != (orte_ras_node_t*)opal_list_get_end(&updates);
              ras_item  = (orte_ras_node_t*)opal_list_get_next(ras_item)) {

            rds_item = OBJ_NEW(orte_rds_cell_desc_t);
            if (NULL == rds_item) {
                ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
                return ORTE_ERR_OUT_OF_RESOURCE;
            }

            rds_item->site  = strdup("Hostfile");
            rds_item->name  = strdup(ras_item->node_name);
            if (need_cellid) {
#if 0 /* JJH Repair when cellid's are fixed */
                /* Create a new cellid for this hostfile */
                rc = orte_ns.create_cellid(&local_cellid, rds_item->site, rds_item->name);
                if (ORTE_SUCCESS != rc) {
                    ORTE_ERROR_LOG(rc);
                    return rc;
                }
#endif
                local_cellid = 0;
                need_cellid = false;
            }

            rds_item->cellid      = local_cellid;
            ras_item->node_cellid = local_cellid;

            new_attr = OBJ_NEW(orte_rds_cell_attr_t);
            if (NULL == new_attr) {
                ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
                return ORTE_ERR_OUT_OF_RESOURCE;
            }
            new_attr->keyval.key          = strdup(ORTE_RDS_NAME);
            new_attr->keyval.value = OBJ_NEW(orte_data_value_t);
            if (NULL == new_attr->keyval.value) {
                ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
                return ORTE_ERR_OUT_OF_RESOURCE;
            }
            new_attr->keyval.value->type   = ORTE_STRING;
            new_attr->keyval.value->data   = strdup(ras_item->node_name);
            opal_list_append(&(rds_item->attributes), &new_attr->super);

            new_attr = OBJ_NEW(orte_rds_cell_attr_t);
            if (NULL == new_attr) {
                ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
                return ORTE_ERR_OUT_OF_RESOURCE;
            }
            new_attr->keyval.key          = strdup(ORTE_CELLID_KEY);
            new_attr->keyval.value = OBJ_NEW(orte_data_value_t);
            if (NULL == new_attr->keyval.value) {
                ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
                return ORTE_ERR_OUT_OF_RESOURCE;
            }
            new_attr->keyval.value->type   = ORTE_CELLID;
            if (ORTE_SUCCESS != (rc = orte_dss.copy(&(new_attr->keyval.value->data), &(rds_item->cellid), ORTE_CELLID))) {
                ORTE_ERROR_LOG(rc);
                return rc;
            }
            opal_list_append(&(rds_item->attributes), &new_attr->super);

            opal_list_append(&rds_updates, &rds_item->super);
        }

        /* Insert the new node into the RDS */
        rc = orte_rds.store_resource(&rds_updates);
        if (ORTE_SUCCESS != rc) {
            goto cleanup;
        }

        /* Then the RAS, since we can assume that any
         * resources listed in the hostfile have been
         * already allocated for our use.
         */
        rc = orte_ras_base_node_insert(&updates);
        if (ORTE_SUCCESS != rc) {
            goto cleanup;
        }
        
        /* and now, indicate that ORTE should override any oversubscribed conditions
         * based on local hardware limits since the user (a) might not have
         * provided us any info on the #slots for a node, and (b) the user
         * might have been wrong! If we don't check the number of local physical
         * processors, then we could be too aggressive on our sched_yield setting
         * and cause performance problems.
         */
        rc = orte_ras_base_set_oversubscribe_override(job);
        if (ORTE_SUCCESS != rc) {
            goto cleanup;
        }
    }

cleanup:
    if (NULL != mca_rds_hostfile_component.path) {
        free(mca_rds_hostfile_component.path);
        mca_rds_hostfile_component.path = NULL;
    }

    while(NULL != (item = opal_list_remove_first(&existing))) {
        OBJ_RELEASE(item);
    }

    while(NULL != (item = opal_list_remove_first(&updates))) {
        OBJ_RELEASE(item);
    }

    while (NULL != (rds_item = (orte_rds_cell_desc_t*)opal_list_remove_first(&rds_updates))) {
        while (NULL != (new_attr = (orte_rds_cell_attr_t*)opal_list_remove_first(&(rds_item->attributes)))) {
            OBJ_RELEASE(new_attr);
        }
        OBJ_RELEASE(rds_item);
    }

    OBJ_DESTRUCT(&existing);
    OBJ_DESTRUCT(&updates);
    OBJ_DESTRUCT(&rds_updates);

    return rc;
}
Example #14
0
static int orte_rds_hostfile_parse_line(int token, opal_list_t* existing, opal_list_t* updates)
{
    char buff[64];
    int rc;
    orte_ras_node_t* node;
    bool update = false;
    bool got_count = false;
    bool got_max = false;

    if (ORTE_RDS_HOSTFILE_STRING == token ||
        ORTE_RDS_HOSTFILE_HOSTNAME == token ||
        ORTE_RDS_HOSTFILE_INT == token ||
        ORTE_RDS_HOSTFILE_IPV4 == token) {
        char* value;
        char** argv;
        char* node_name = NULL;
        char* username = NULL;
        int cnt;

        if(ORTE_RDS_HOSTFILE_INT == token) {
            sprintf(buff,"%d", orte_rds_hostfile_value.ival);
            value = buff;
        } else {
            value = orte_rds_hostfile_value.sval;
        }
        argv = opal_argv_split (value, '@');

        cnt = opal_argv_count (argv);
        if (1 == cnt) {
            node_name = strdup(argv[0]);
        } else if (2 == cnt) {
            username = strdup(argv[0]);
            node_name = strdup(argv[1]);
        } else {
            opal_output(0, "WARNING: Unhandeled user@host-combination\n"); /* XXX */
        }
        opal_argv_free (argv);

        /* convert this into something globally unique */
        if(strcmp(node_name, "localhost") == 0) {
            /* Nodename has been allocated, that is for sure */
            free (node_name);
            node_name = strdup(orte_system_info.nodename);
        }

        /* Do we need to make a new node object?  First check to see
           if it's in the existing list. */

        if (NULL == (node = orte_rds_hostfile_lookup(existing, node_name))) {

            /* If it wasn't, see if it's already in the updates list */

            if (NULL == (node = orte_rds_hostfile_lookup(updates,
                                                         node_name))) {
                node = OBJ_NEW(orte_ras_node_t);
                node->node_name = node_name;
                node->node_username = username;
                node->node_slots = 0;

#if 0
                /* get a new cellid for this node */
                /* JMS Temporarily turned off until cell IDs are
                   properly handled elsewhere in the code */
                /* JJH This assumes that each hostname listed should be
                   placed in a new cell. Is this accurate to the design?
                */
                if (ORTE_SUCCESS !=
                    (rc = orte_ns.create_cellid(&(node->node_cellid),
                                                "UNKNOWN-SITE",
                                                node->node_name))) {
                    ORTE_ERROR_LOG(rc);
                    return rc;
                }
#endif
            }

            /* Note that we need to set update to true regardless of
               whether the node was found on the updates list or not.
               If it was found, we just removed it (in
               orte_rds_hostfile_lookup()), so the update puts it back
               (potentially after updating it, of course).  If it was
               not found, then we have a new node instance that needs
               to be added to the updates list. */

            update = true;
        }
        else {
            /* If it was in the existing list, then we can use its cellid
             * to add the reset of the hosts in the file to. */
            local_cellid = node->node_cellid;
            need_cellid = false;
        }
    } else {
        orte_rds_hostfile_parse_error(token);
        return ORTE_ERROR;
    }

    got_count = false;
    while (!orte_rds_hostfile_done) {
        token = orte_rds_hostfile_lex();

        switch (token) {
        case ORTE_RDS_HOSTFILE_DONE:
            goto done;

        case ORTE_RDS_HOSTFILE_NEWLINE:
            goto done;

        case ORTE_RDS_HOSTFILE_USERNAME:
            node->node_username = orte_rds_hostfile_parse_string();
            break;

        case ORTE_RDS_HOSTFILE_COUNT:
        case ORTE_RDS_HOSTFILE_CPU:
        case ORTE_RDS_HOSTFILE_SLOTS:
            rc = orte_rds_hostfile_parse_int();
            if (rc < 0) {
                opal_show_help("help-rds-hostfile.txt", "rds:slots",
                               true,
                               cur_hostfile_name, rc);
                OBJ_RELEASE(node);
                return ORTE_ERROR;
            }
            node->node_slots += rc;
            update = true;
            got_count = true;

            /* Ensure that node_slots_max >= node_slots */
            if (node->node_slots_max != 0 && node->node_slots_max < node->node_slots) {
                node->node_slots_max = node->node_slots;
            }
            break;

        case ORTE_RDS_HOSTFILE_SLOTS_MAX:
            rc = orte_rds_hostfile_parse_int();
            if (rc < 0) {
                opal_show_help("help-rds-hostfile.txt", "rds:max_slots",
                               true,
                               cur_hostfile_name, ((size_t) rc));
                OBJ_RELEASE(node);
                return ORTE_ERROR;
            }
            /* Only take this update if it puts us >= node_slots */
            if (rc >= node->node_slots) {
                if (node->node_slots_max != rc) {
                    node->node_slots_max = rc;
                    update = true;
                    got_max = true;
                }
            } else {
                opal_show_help("help-rds-hostfile.txt", "rds:max_slots_lt",
                               true,
                               cur_hostfile_name, node->node_slots, rc);
                ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
                OBJ_RELEASE(node);
                return ORTE_ERROR;
            }
            break;

        default:
            orte_rds_hostfile_parse_error(token);
            OBJ_RELEASE(node);
            return ORTE_ERROR;
        }
    }

done:
    if (update) {
        if (!got_count) {
            if (got_max) {
                node->node_slots = node->node_slots_max;
            } else {
                ++node->node_slots;
            }
        }
        opal_list_append(updates, &node->super);
    } else {
        OBJ_RELEASE(node);
    }
    return ORTE_SUCCESS;
}
Example #15
0
/*
 * Start monitoring of local processes
 */
static void start(orte_jobid_t jobid)
{
    mca_base_component_t *c = &mca_sensor_file_component.super.base_version;
    opal_list_item_t *item;
    orte_odls_job_t *jobdat;
    orte_app_context_t *app, *aptr;
    int rc, tmp;
    char *filename;
    file_tracker_t *ft;

    /* cannot monitor my own job */
    if (jobid == ORTE_PROC_MY_NAME->jobid && ORTE_JOBID_WILDCARD != jobid) {
        return;
    }
    
    OPAL_OUTPUT_VERBOSE((1, orte_sensor_base.output,
                         "%s starting file monitoring for job %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_JOBID_PRINT(jobid)));
    
    /* get the local jobdat for this job */
    for (item = opal_list_get_first(&orte_local_jobdata);
         item != opal_list_get_end(&orte_local_jobdata);
         item = opal_list_get_end(&orte_local_jobdata)) {
        jobdat = (orte_odls_job_t*)item;
        if (jobid == jobdat->jobid || ORTE_JOBID_WILDCARD == jobid) {
            /* must be at least one app_context, so use the first one found */
            app = NULL;
            for (tmp=0; tmp < jobdat->apps.size; tmp++) {
                if (NULL != (aptr = (orte_app_context_t*)opal_pointer_array_get_item(&jobdat->apps, tmp))) {
                    app = aptr;
                    break;
                }
            }
            if (NULL == app) {
                /* got a problem */
                ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
                continue;
            }
            
            /* search the environ to get the filename */
            if (ORTE_SUCCESS != (rc = mca_base_param_find_string(c, "filename", app->env, &filename))) {
                /* was a default file given */
                if (NULL == mca_sensor_file_component.file) {
                    /* can't do anything without a file */
                    OPAL_OUTPUT_VERBOSE((1, orte_sensor_base.output,
                                         "%s sensor:file no file for job %s",
                                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                         ORTE_JOBID_PRINT(jobid)));
                    continue;
                }
                filename = mca_sensor_file_component.file;
            }
            
            /* create the tracking object */
            ft = OBJ_NEW(file_tracker_t);
            ft->jobid = jobid;
            ft->file = strdup(filename);
            
            /* search the environ to see what we are checking */
            tmp = 0;
            if (ORTE_SUCCESS != (rc = mca_base_param_find_int(c, "check_size", app->env, &tmp))) {
                /* was a default value given */
                if (0 < mca_sensor_file_component.check_size) {
                    ft->check_size = OPAL_INT_TO_BOOL(mca_sensor_file_component.check_size);
                }
            } else {
                ft->check_size = OPAL_INT_TO_BOOL(tmp);
            }
            tmp = 0;
            if (ORTE_SUCCESS != (rc = mca_base_param_find_int(c, "check_access", app->env, &tmp))) {
                /* was a default value given */
                if (0 < mca_sensor_file_component.check_access) {
                    ft->check_access = OPAL_INT_TO_BOOL(mca_sensor_file_component.check_access);
                }
            } else {
                ft->check_access = OPAL_INT_TO_BOOL(tmp);
            }
            tmp = 0;
            if (ORTE_SUCCESS != (rc = mca_base_param_find_int(c, "check_mod", app->env, &tmp))) {
                /* was a default value given */
                if (0 < mca_sensor_file_component.check_mod) {
                    ft->check_mod = OPAL_INT_TO_BOOL(mca_sensor_file_component.check_mod);
                }
            } else {
                ft->check_mod = OPAL_INT_TO_BOOL(tmp);
            }
            tmp = 0;
            if (ORTE_SUCCESS != (rc = mca_base_param_find_int(c, "limit", app->env, &tmp))) {
                ft->limit = mca_sensor_file_component.limit;
            } else {
                ft->limit = tmp;
            }
            opal_list_append(&jobs, &ft->super);
            OPAL_OUTPUT_VERBOSE((1, orte_sensor_base.output,
                                 "%s file %s monitored for %s%s%s with limit %d",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ft->file, ft->check_size ? "SIZE:" : " ",
                                 ft->check_access ? "ACCESS TIME:" : " ",
                                 ft->check_mod ? "MOD TIME" : " ", ft->limit));
        }
    }
    
    /* start sampling */
    if (NULL == sample_ev && !opal_list_is_empty(&jobs)) {
        /* startup a timer to wake us up periodically
         * for a data sample
         */
        sample_ev =  (opal_event_t *) malloc(sizeof(opal_event_t));
        opal_event_evtimer_set(opal_event_base, sample_ev, sample, sample_ev);
        sample_time.tv_sec = mca_sensor_file_component.sample_rate;
        sample_time.tv_usec = 0;
        opal_event_evtimer_add(sample_ev, &sample_time);
    }
    return;
}
static void
rml_oob_recv_route_callback(int status,
                            struct orte_process_name_t* peer,
                            struct iovec* iov,
                            int count,
                            orte_rml_tag_t tag,
                            void *cbdata)
{
    orte_rml_oob_msg_header_t *hdr = 
        (orte_rml_oob_msg_header_t*) iov[0].iov_base;
    int real_tag;
    int ret;
    orte_process_name_t next, origin;
    struct iovec *new_iov;

    /* BWB -- propogate errors here... */
    assert(status >= 0);

    ORTE_RML_OOB_MSG_HEADER_NTOH(*hdr);

    origin = hdr->origin;

    next = orte_routed.get_route(&hdr->destination);
    if (next.vpid == ORTE_VPID_INVALID) {
        opal_output(0, "%s:route_callback tried routing message from %s to %s:%d, can't find route",
                    ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                    ORTE_NAME_PRINT(&origin),
                    ORTE_NAME_PRINT(&hdr->destination),
                    hdr->tag);
        opal_backtrace_print(stderr);
        orte_errmgr.abort(ORTE_ERROR_DEFAULT_EXIT_CODE, NULL);
        return;
    }

    if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &next, ORTE_PROC_MY_NAME)) {
        opal_output(0, "%s:route_callback trying to get message from %s to %s:%d, routing loop",
                    ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                    ORTE_NAME_PRINT(&origin),
                    ORTE_NAME_PRINT(&hdr->destination),
                    hdr->tag);
        opal_backtrace_print(stderr);
        orte_errmgr.abort(ORTE_ERROR_DEFAULT_EXIT_CODE, NULL);
    }

    if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &next, &hdr->destination)) {
        real_tag = hdr->tag;
    } else {
        real_tag = ORTE_RML_TAG_RML_ROUTE;
    }

    OPAL_OUTPUT_VERBOSE((1, orte_rml_base_output,
                         "%s routing message from %s for %s to %s (tag: %d)",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_NAME_PRINT(&hdr->origin),
                         ORTE_NAME_PRINT(&hdr->destination),
                         ORTE_NAME_PRINT(&next),
                         hdr->tag));

    ORTE_RML_OOB_MSG_HEADER_HTON(*hdr);

    /* NTH: fix potential race condition. oob may modify iov before
       the oob send completes */
    new_iov = (struct iovec*) calloc (count, sizeof (struct iovec));
    if (NULL == new_iov) {
	opal_output (0, "%s:route_callback malloc error!", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
        opal_backtrace_print(stderr);
        orte_errmgr.abort(ORTE_ERROR_DEFAULT_EXIT_CODE, NULL);    
    }

    memcpy (new_iov, iov, count * sizeof (struct iovec));

    ret = orte_rml_oob_module.active_oob->oob_send_nb(&next,
                                                      &origin,
                                                      new_iov,
                                                      count,
                                                      real_tag,
                                                      0,
                                                      rml_oob_recv_route_send_callback,
                                                      NULL);

    if (ORTE_SUCCESS != ret) {
        if (ORTE_ERR_ADDRESSEE_UNKNOWN == ret) {
            /* no route -- queue and hope we find a route */
            orte_rml_oob_queued_msg_t *qmsg = OBJ_NEW(orte_rml_oob_queued_msg_t);
            OPAL_OUTPUT_VERBOSE((1, orte_rml_base_output,
                                 "%s: no OOB information for %s.  Queuing for later.",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ORTE_NAME_PRINT(&next)));
            ORTE_RML_OOB_MSG_HEADER_NTOH(*hdr);
            qmsg->payload[0].iov_base = (IOVBASE_TYPE*) malloc(iov[0].iov_len);
            if (NULL == qmsg->payload[0].iov_base) abort();
            qmsg->payload[0].iov_len = iov[0].iov_len;
            memcpy(qmsg->payload[0].iov_base, iov[0].iov_base, iov[0].iov_len);
            OPAL_THREAD_LOCK(&orte_rml_oob_module.queued_lock);
            opal_list_append(&orte_rml_oob_module.queued_routing_messages,
                             &qmsg->super);
            if (1 == opal_list_get_size(&orte_rml_oob_module.queued_routing_messages)) {
                opal_event_evtimer_add(orte_rml_oob_module.timer_event,
                                       &orte_rml_oob_module.timeout);
            }
            OPAL_THREAD_UNLOCK(&orte_rml_oob_module.queued_lock);
        } else {
            opal_output(0,
                        "%s failed to send message to %s: %s (rc = %d)",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        ORTE_NAME_PRINT(&next),
                        opal_strerror(ret),
                        ret);
            orte_errmgr.abort(ORTE_ERROR_DEFAULT_EXIT_CODE, NULL);
        }
    }
}
Example #17
0
/* the -host option can always be used in both absolute
 * and relative mode, so we have to check for pre-existing
 * allocations if we are to use relative node syntax
 */
int orte_util_filter_dash_host_nodes(opal_list_t *nodes,
                                     char** host_argv)
{
    opal_list_item_t* item;
    bool found;
    opal_list_item_t *next;
    orte_std_cntr_t i, j, k, len_mapped_node=0;
    int rc;
    char **mapped_nodes = NULL, **mini_map, *cptr;
    orte_node_t *node, **nodepool;
    int nodeidx;
    int num_empty=0;
    opal_list_t keep;
    bool want_all_empty = false;
    
    /* if the incoming node list is empty, then there
     * is nothing to filter!
     */
    if (opal_list_is_empty(nodes)) {
        return ORTE_SUCCESS;
    }

    /* setup for relative node syntax */
    nodepool = (orte_node_t**)orte_node_pool->addr;
    
    /* Accumulate all of the host name mappings */
    for (j = 0; j < opal_argv_count(host_argv); ++j) {
        mini_map = opal_argv_split(host_argv[j], ',');
        
        for (k = 0; NULL != mini_map[k]; ++k) {
            if ('+' == mini_map[k][0]) {
                /* see if we specified empty nodes */
                if ('e' == mini_map[k][1] ||
                    'E' == mini_map[k][1]) {
                    /* request for empty nodes - do they want
                     * all of them?
                     */
                    if (NULL != (cptr = strchr(mini_map[k], ':'))) {
                        /* the colon indicates a specific # are requested */
                        cptr++; /* step past : */
                        /* put a marker into the list */
                        cptr--;
                        *cptr = '*';
                        opal_argv_append_nosize(&mapped_nodes, cptr);
                    } else {
                        /* add a marker to the list */
                        opal_argv_append_nosize(&mapped_nodes, "*");
                        want_all_empty = true;
                    }
                } else if ('n' == mini_map[k][1] ||
                           'N' == mini_map[k][1]) {
                    /* they want a specific relative node #, so
                     * look it up on global pool
                     */
                    nodeidx = strtol(&mini_map[k][2], NULL, 10);
                    if (nodeidx < 0 ||
                        nodeidx > (int)orte_node_pool->size) {
                        /* this is an error */
                        orte_show_help("help-dash-host.txt", "dash-host:relative-node-out-of-bounds",
                                       true, nodeidx, mini_map[k]);
                        rc = ORTE_ERR_SILENT;
                        goto cleanup;
                    }
                    /* if the HNP is not allocated, then we need to
                     * adjust the index as the node pool is offset
                     * by one
                     */
                    if (!orte_hnp_is_allocated) {
                        nodeidx++;
                    }
                    /* see if that location is filled */
                    
                    if (NULL == nodepool[nodeidx]) {
                        /* this is an error */
                        orte_show_help("help-dash-host.txt", "dash-host:relative-node-not-found",
                                       true, nodeidx, mini_map[k]);
                        rc = ORTE_ERR_SILENT;
                        goto cleanup;
                    }
                    /* add this node to the list */
                    opal_argv_append_nosize(&mapped_nodes, nodepool[nodeidx]->name);
                } else {
                    /* invalid relative node syntax */
                    orte_show_help("help-dash-host.txt", "dash-host:invalid-relative-node-syntax",
                                   true, mini_map[k]);
                    rc = ORTE_ERR_SILENT;
                    goto cleanup;
                }
            } else { /* non-relative syntax - add to list */
                if (OPAL_SUCCESS != (rc = opal_argv_append_nosize(&mapped_nodes, 
                                                                  mini_map[k]))) {
                    goto cleanup;
                }
            }
        }
        opal_argv_free(mini_map);
    }
    
    /* Did we find anything? If not, then do nothing */
    if (NULL == mapped_nodes && 0 == num_empty) {
        return ORTE_SUCCESS;
    }
    
    /* we found some info - filter what is on the list...
     * i.e., go through the list and remove any nodes that
     * were -not- included on the -host list.
     *
     * NOTE: The following logic is based on knowing that
     * any node can only be included on the incoming
     * nodes list ONCE.
     */
    
    len_mapped_node = opal_argv_count(mapped_nodes);
    /* setup a working list so we can put the final list
     * of nodes in order. This way, if the user specifies a
     * set of nodes, we will use them in the order in which
     * they were specifed. Note that empty node requests
     * will always be appended to the end
     */
    OBJ_CONSTRUCT(&keep, opal_list_t);
    
    for (i = 0; i < len_mapped_node; ++i) {
        /* check if we are supposed to add some number of empty
         * nodes here
         */
        if ('*' == mapped_nodes[i][0]) {
            /* if there is a number after the '*', then we are
             * to insert a specific # of nodes
             */
            if ('\0' == mapped_nodes[i][1]) {
                /* take all empty nodes from the list */
                num_empty = INT_MAX;
            } else {
                /* extract number of nodes to take */
                num_empty = strtol(&mapped_nodes[i][1], NULL, 10);
            }
            /* search for empty nodes and take them */
            item = opal_list_get_first(nodes);
            while (0 < num_empty && item != opal_list_get_end(nodes)) {
                next = opal_list_get_next(item);  /* save this position */
                node = (orte_node_t*)item;
                /* see if this node is empty */
                if (0 == node->slots_inuse) {
                    /* check to see if it is specified later */
                    for (j=i+1; j < len_mapped_node; j++) {
                        if (0 == strcmp(mapped_nodes[j], node->name)) {
                            /* specified later - skip this one */
                            goto skipnode;
                        }
                    }
                    /* remove item from list */
                    opal_list_remove_item(nodes, item);
                    /* xfer to keep list */
                    opal_list_append(&keep, item);
                    --num_empty;
                }
            skipnode:
                item = next;
            }
        } else {
            /* we are looking for a specific node on the list
             * we have a match if one of two conditions is met:
             * 1. the node_name and mapped_nodes directly match
             * 2. the node_name is the local system name AND
             *    either the mapped_node is "localhost" OR it
             *    is a local interface as found by opal_ifislocal
             */
            item = opal_list_get_first(nodes);
            while (item != opal_list_get_end(nodes)) {
                next = opal_list_get_next(item);  /* save this position */
                node = (orte_node_t*)item;
                /* search -host list to see if this one is found */
                found = false;
                if ((0 == strcmp(node->name, mapped_nodes[i]) ||
                    (0 == strcmp(node->name, orte_process_info.nodename) &&
                    (0 == strcmp(mapped_nodes[i], "localhost") || opal_ifislocal(mapped_nodes[i]))))) {
                    /* remove item from list */
                    opal_list_remove_item(nodes, item);
                    /* xfer to keep list */
                    opal_list_append(&keep, item);
                    break;
                }
                item = next;
            }
        }
        /* done with the mapped entry */
        free(mapped_nodes[i]);
        mapped_nodes[i] = NULL;
    }

    /* was something specified that was -not- found? */
    for (i=0; i < len_mapped_node; i++) {
        if (NULL != mapped_nodes[i]) {
            orte_show_help("help-dash-host.txt", "not-all-mapped-alloc",
                           true, mapped_nodes[i]);
            rc = ORTE_ERR_SILENT;
            goto cleanup;
        }
    }
    
    /* clear the rest of the nodes list */
    while (NULL != (item = opal_list_remove_first(nodes))) {
        OBJ_RELEASE(item);
    }
    
    /* the nodes list has been cleared - rebuild it in order */
    while (NULL != (item = opal_list_remove_first(&keep))) {
        opal_list_append(nodes, item);
    }
    
    /* did they ask for more than we could provide */
    if (!want_all_empty && 0 < num_empty) {
        orte_show_help("help-dash-host.txt", "dash-host:not-enough-empty",
                       true, num_empty);
        rc = ORTE_ERR_SILENT;
        goto cleanup;
    }
    
    rc = ORTE_SUCCESS;
    /* done filtering existing list */
    
cleanup:
    for (i=0; i < len_mapped_node; i++) {
        if (NULL != mapped_nodes[i]) {
            free(mapped_nodes[i]);
            mapped_nodes[i] = NULL;
        }
    }
    if (NULL != mapped_nodes) {
        free(mapped_nodes);
    }
    
    return rc;
}
Example #18
0
static int __create_fca_comm(mca_coll_fca_module_t *fca_module)
{
    int rc, ret;
    int result = MPI_UNEQUAL;
    mca_coll_fca_c_cache_item_t *c_item = NULL, *c_item_new = NULL;
    mca_coll_fca_component_t *cm = &mca_coll_fca_component;
    int comm_size = ompi_comm_size(fca_module->comm);
    ompi_communicator_t *comm = fca_module->comm;
    opal_list_t *c_cache;
    struct timeval start, end, seq_start, seq_end, par_start, par_end;
    int act_deep;


    if(mca_coll_fca_component.fca_verbose == 10) {
        gettimeofday(&start, NULL);
    }

    if(mca_coll_fca_component.fca_enable_cache) {

        c_cache = &cm->c_cache;

        if(mca_coll_fca_component.fca_enable_hash){

            int  grank = ORTE_PROC_MY_NAME->vpid;
            int hash_index, part_of_hash_index;

            if(mca_coll_fca_component.fca_parallel_hash_calc == 1) {

                if(mca_coll_fca_component.fca_verbose == 10){
                    gettimeofday(&par_start, NULL);
                }

                part_of_hash_index = modular_pow(cm->fca_primes[grank % cm->fca_number_of_primes], grank, cm->fca_hash_size);
                rc = comm->c_coll.coll_allreduce(&part_of_hash_index, &hash_index, 1, MPI_INT, MPI_SUM, comm, comm->c_coll.coll_allreduce_module);
                if (rc != OMPI_SUCCESS) {
                    FCA_ERROR("Failed to reduce hash_index: %d", rc);
                    return rc;
                }

                if(mca_coll_fca_component.fca_verbose == 10){
                    gettimeofday(&par_end, NULL);
                    mca_coll_fca_component.fca_work_time_parallel =+
                        par_end.tv_sec - par_start.tv_sec + 1e-6 * (par_end.tv_usec - par_start.tv_usec);
                }
            }else{

                if(mca_coll_fca_component.fca_verbose == 10){
                    gettimeofday(&seq_start, NULL);
                }

                hash_index = 0;
                int q_counter = 0;
                int q_comm_size = ompi_comm_size (comm);

                for(q_counter = 0; q_counter < q_comm_size; q_counter++)
                {
                    hash_index += modular_pow(cm->fca_primes[q_counter % cm->fca_number_of_primes], q_counter, cm->fca_hash_size);
                }

                if(mca_coll_fca_component.fca_verbose == 10){
                    gettimeofday(&seq_end, NULL);
                    mca_coll_fca_component.fca_work_time_sequency =+
                        seq_end.tv_sec - seq_start.tv_sec + 1e-6 * (seq_end.tv_usec - seq_start.tv_usec);
                }

            }

            if(cm->fca_hash[hash_index % cm->fca_hash_size] != NULL)
            {
                c_cache = cm->fca_hash[hash_index % cm->fca_hash_size];
                if(mca_coll_fca_component.fca_verbose == 10) {
                    gettimeofday(&end, NULL);
                    mca_coll_fca_component.fca_total_work_time =+
                        end.tv_sec - start.tv_sec + 1e-6 * (end.tv_usec - start.tv_usec);
                    mca_coll_fca_component.fca_hash_hit += 1;
                }
            }else
            {
                if(mca_coll_fca_component.fca_verbose == 10) {
                    mca_coll_fca_component.fca_hash_miss += 1;
                }
                c_cache = OBJ_NEW(opal_list_t);
                cm->fca_hash[hash_index % cm->fca_hash_size] = c_cache;
            }

        }

        act_deep = 0;
        for( c_item = (mca_coll_fca_c_cache_item_t *)opal_list_get_first(c_cache);
                c_item != (mca_coll_fca_c_cache_item_t *)opal_list_get_end(c_cache);
                c_item = (mca_coll_fca_c_cache_item_t *)opal_list_get_next((opal_list_item_t *) c_item)){
            act_deep++;
            /* first check the size */
            if( c_item && (comm_size == c_item->size)) {
                /* then we have a potential cache hit */
                ompi_comm_compare(comm, c_item->comm, &result);
                if( MPI_CONGRUENT == result) {
                    /* cache hit! Return the context and be done with it */
                    /* first bump the score */
                    ret = fca_comm_get_caps(c_item->fca_comm_wrap->fca_comm, &fca_module->fca_comm_caps);
                    if (ret < 0) {
                        FCA_ERROR("GET_COMM_CAPS failed: %s", fca_strerror(ret));
                        return OMPI_ERROR;
                    }
                    fca_module->fca_comm = c_item->fca_comm_wrap->fca_comm;

                    if(mca_coll_fca_component.fca_verbose == 10) {
                        gettimeofday(&end, NULL);

                        mca_coll_fca_component.fca_total_work_time =+
                            end.tv_sec - start.tv_sec + 1e-6 * (end.tv_usec - start.tv_usec);

                        mca_coll_fca_component.fca_cache_hit += 1;

                        if(act_deep>mca_coll_fca_component.fca_max_deep_in_cache)
                            mca_coll_fca_component.fca_max_deep_in_cache = act_deep;
                    }
                    return OMPI_SUCCESS;
                }
            }
        }
    }
    rc = __fca_comm_new(fca_module);
    if (OMPI_SUCCESS != rc) {
        return rc;
    }
#if OMPI_FCA_VERSION < 30
    /* allocate comm_init_spec */
    FCA_MODULE_VERBOSE(fca_module, 1, "Starting COMM_INIT comm_id %d proc_idx %d num_procs %d",
                       fca_module->fca_comm_desc.comm_id, fca_module->local_proc_idx,
                       fca_module->num_local_procs);

    ret = mca_coll_fca_comm_init(mca_coll_fca_component.fca_context,
                                 fca_module->rank, ompi_comm_size(fca_module->comm),
                                 fca_module->local_proc_idx, fca_module->num_local_procs,
                                 &fca_module->fca_comm_desc, &fca_module->fca_comm);
    if (ret < 0) {
        FCA_ERROR("COMM_INIT failed: %s", fca_strerror(ret));
        return OMPI_ERROR;
     }
#endif
    /* get communicator capabilities */
    ret = fca_comm_get_caps(fca_module->fca_comm, &fca_module->fca_comm_caps);
    if (ret < 0) {
        FCA_ERROR("GET_COMM_CAPS failed: %s", fca_strerror(ret));
        return OMPI_ERROR;
    }

    /* by this point every rank in the communicator is set up */
    FCA_MODULE_VERBOSE(fca_module, 1, "Initialized FCA communicator, comm_id %d",
            fca_module->fca_comm_desc.comm_id);
    if(mca_coll_fca_component.fca_enable_cache) {

        c_item_new = OBJ_NEW(mca_coll_fca_c_cache_item_t);
        c_item_new->fca_comm_wrap = OBJ_NEW(mca_coll_fca_comm_wrap_t);

        OBJ_RETAIN(comm);

        c_item_new->size = comm_size;
        c_item_new->comm = comm;
        c_item_new->fca_comm_wrap->fca_comm = fca_module->fca_comm;
        c_item_new->fca_comm_wrap->rank = fca_module->rank;
        c_item_new->fca_comm_wrap->comm_id = fca_module->fca_comm_desc.comm_id;

        opal_list_append(c_cache,(opal_list_item_t *) c_item_new);
    }

    if(mca_coll_fca_component.fca_verbose == 10) {

        gettimeofday(&end, NULL);

        mca_coll_fca_component.fca_total_work_time =+
            end.tv_sec - start.tv_sec + 1e-6 * (end.tv_usec - start.tv_usec);

        mca_coll_fca_component.fca_cache_miss += 1;
    }
    return OMPI_SUCCESS;
}
Example #19
0
static int orte_ras_yarn_allocate_internal(int np, opal_list_t* nodes) {
    int rc, i;

    // create and send allocate message
    struct pbc_wmessage* msg = pbc_wmessage_new(orte_hdclient_pb_env, "AllocateRequestProto");
    if (!msg) {
        opal_output(0, "%s ras:yarn failed to create AllocateRequestProto", 
            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
        return ORTE_ERROR;
    }
    pbc_wmessage_integer(msg, "resource_count", np, 0);
    rc = orte_hdclient_send_message_and_delete(msg, HAMSTER_MSG_ALLOCATE);
    if (rc != 0) {
        opal_output(0, "%s ras:yarn error happened when send allocate msg to AM",
            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
        return ORTE_ERROR;
    }

    // read rmessage out
    struct pbc_rmessage* response = orte_hdclient_recv_message("AllocateResponseProto");
    if (!response) {
        opal_output(0, "%s ras:yarn error happened when recv allocate response from AM",
            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
        return ORTE_ERROR;
    }

    int n = pbc_rmessage_size(response, "node_resources");
    if (n <= 0) {
        opal_output(0, "%s ras:yarn got n(=%d) <= 0, please check",
            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), n);
        return ORTE_ERROR;
    }

    // read node resources
    for (i = 0; i < n; i++) {
        struct pbc_rmessage* node_res = pbc_rmessage_message(response, "node_resources", i);
        if (!node_res) {
            opal_output(0, "%s ras:yarn error when parse returned resource from AM", 
                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            return ORTE_ERROR;
        }
        
        // parse host, slot
        const char* host = pbc_rmessage_string(node_res, "host_name", 0, NULL);
        if (!host) {
            opal_output(0, "%s ras:yarn error when parse host from returned resource from AM",
                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            return ORTE_ERROR;
        }
        int slot = pbc_rmessage_integer(node_res, "slot", 0, NULL);

        // make node_t and add it to nodes
        orte_node_t* node = OBJ_NEW(orte_node_t);
        if (!node) {
            opal_output(0, "%s ras:yarn failed to create orte_node_t obj", 
                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            return ORTE_ERROR;
        }

        node->name = strdup(host);
        node->state = ORTE_NODE_STATE_UP;
        node->slots_inuse = 0;
        node->slots_max = 0;
        node->slots = slot;
        opal_list_append(nodes, &node->super);

        OPAL_OUTPUT_VERBOSE((5, orte_ras_base.ras_output,
                     "%s ras:yarn: adding node %s with %d slot",
                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                     host, slot));
    }

    // All done
    OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
                         "%s ras:yarn:allocate: success",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));

    return ORTE_SUCCESS;
}
static inline int mca_oob_ud_device_setup (mca_oob_ud_device_t *device,
                                           struct ibv_device *ib_device)
{
    int rc, port_num;
    struct ibv_device_attr dev_attr;

    OPAL_OUTPUT_VERBOSE((5, mca_oob_base_output, "%s oob:ud:device_setup attempting to setup ib device %p",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (void *) ib_device));

    device->ib_context = ibv_open_device (ib_device);
    if (NULL == device->ib_context) {
        OPAL_OUTPUT_VERBOSE((5, mca_oob_base_output, "%s oob:ud:device_setup error opening device. errno = %d",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), errno));
        return ORTE_ERROR;
    }

    rc = ibv_query_device (device->ib_context, &dev_attr); 
    if (0 != rc) {
        OPAL_OUTPUT_VERBOSE((5, mca_oob_base_output, "%s oob:ud:device_setup error querying device. errno = %d",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), errno));
        return ORTE_ERROR;
    }

    device->ib_channel = ibv_create_comp_channel (device->ib_context);
    if (NULL == device->ib_channel) {
        OPAL_OUTPUT_VERBOSE((5, mca_oob_base_output, "%s oob:ud:device_setup error completing completion channel."
                             "errno = %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), errno));
        return ORTE_ERROR;
    }

    device->ib_pd = ibv_alloc_pd (device->ib_context);
    if (NULL == device->ib_pd) {
        OPAL_OUTPUT_VERBOSE((5, mca_oob_base_output, "%s oob:ud:device_setup error allocating protection domain."
                             "errno = %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), errno));
        return ORTE_ERROR;
    }

    for (port_num = 1 ; port_num <= dev_attr.phys_port_cnt ; ++port_num) {
        mca_oob_ud_port_t *port = OBJ_NEW(mca_oob_ud_port_t);

        if (NULL == port) {
            opal_output (0, "oob:ud:device_setup malloc failure. errno = %d", errno);
            return ORTE_ERR_OUT_OF_RESOURCE;
        }

        port->device = device;
        port->port_num = port_num;

        rc = mca_oob_ud_port_setup (port);
        if (ORTE_SUCCESS != rc) {
            OBJ_RELEASE(port);
            continue;
        }

        opal_list_append (&device->ports, (opal_list_item_t *) port);

	break;
    }

    if (0 == opal_list_get_size(&device->ports)) {
        OPAL_OUTPUT_VERBOSE((5, mca_oob_base_output, "%s oob:ud:device_setup could not init device. no usable "
                             "ports present", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
        return ORTE_ERROR;
    }

    return ORTE_SUCCESS;
}
Example #21
0
/**
 * this routine assumes that sorted_procs is in the following state:
 *     o all the local procs at the beginning.
 *     o sorted_procs[0] is the lowest named process.
 */
int
mca_common_sm_rml_info_bcast(opal_shmem_ds_t *ds_buf,
                             ompi_proc_t **procs,
                             size_t num_procs,
                             int tag,
                             bool bcast_root,
                             char *msg_id_str,
                             opal_list_t *pending_rml_msgs)
{
    int rc = OMPI_SUCCESS;
    struct iovec iov[MCA_COMMON_SM_RML_MSG_LEN];
    int iovrc;
    size_t p;
    char msg_id_str_to_tx[OPAL_PATH_MAX];

    strncpy(msg_id_str_to_tx, msg_id_str, sizeof(msg_id_str_to_tx) - 1);

    /* let the first item be the queueing id name */
    iov[0].iov_base = (ompi_iov_base_ptr_t)msg_id_str_to_tx;
    iov[0].iov_len = sizeof(msg_id_str_to_tx);
    iov[1].iov_base = (ompi_iov_base_ptr_t)ds_buf;
    iov[1].iov_len = sizeof(opal_shmem_ds_t);

    /* figure out if i am the root proc in the group.
     * if i am, bcast the message the rest of the local procs.
     */
    if (bcast_root) {
        opal_progress_event_users_increment();
        /* first num_procs items should be local procs */
        for (p = 1; p < num_procs; ++p) {
            iovrc = orte_rml.send(&(procs[p]->proc_name), iov,
                                  MCA_COMMON_SM_RML_MSG_LEN, tag, 0);
            if ((ssize_t)(iov[0].iov_len + iov[1].iov_len) > iovrc) {
                ORTE_ERROR_LOG(OMPI_ERR_COMM_FAILURE);
                opal_progress_event_users_decrement();
                rc = OMPI_ERROR;
                goto out;
            }
        }
        opal_progress_event_users_decrement();
    }
    else { /* i am NOT the root ("lowest") proc */
        opal_list_item_t *item;
        mca_common_sm_rml_pending_rml_msg_types_t *rml_msg;
        /* because a component query can be performed simultaneously in multiple
         * threads, the RML messages may arrive in any order.  so first check to
         * see if we previously received a message for me.
         */
        for (item = opal_list_get_first(pending_rml_msgs);
             opal_list_get_end(pending_rml_msgs) != item;
             item = opal_list_get_next(item)) {
            rml_msg = (mca_common_sm_rml_pending_rml_msg_types_t *)item;
            /* was the message for me? */
            if (0 == strcmp(rml_msg->msg_id_str, msg_id_str)) {
                opal_list_remove_item(pending_rml_msgs, item);
                /*                 from ==============> to */
                opal_shmem_ds_copy(&rml_msg->shmem_ds, ds_buf);
                OBJ_RELEASE(item);
                break;
            }
        }
        /* if we didn't find a message already waiting, block on receiving from
         * the RML.
         */
        if (opal_list_get_end(pending_rml_msgs) == item) {
            do {
                /* bump up the libevent polling frequency while we're in this
                 * RML recv, just to ensure we're checking libevent frequently.
                 */
                opal_progress_event_users_increment();
                iovrc = orte_rml.recv(&(procs[0]->proc_name), iov,
                                      MCA_COMMON_SM_RML_MSG_LEN, tag, 0);
                opal_progress_event_users_decrement();
                if (iovrc < 0) {
                    ORTE_ERROR_LOG(OMPI_ERR_RECV_LESS_THAN_POSTED);
                    rc = OMPI_ERROR;
                    goto out;
                }
                /* was the message for me?  if so, we're done */
                if (0 == strcmp(msg_id_str_to_tx, msg_id_str)) {
                    break;
                }
                /* if not, put it on the pending list and try again */
                if (NULL == (rml_msg =
                            OBJ_NEW(mca_common_sm_rml_pending_rml_msg_types_t)))
                {
                    ORTE_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE);
                    rc = OMPI_ERROR;
                    goto out;
                }
                /* not for me, so place on list */
                /*                 from ========> to */
                opal_shmem_ds_copy(ds_buf, &rml_msg->shmem_ds);
                memcpy(rml_msg->msg_id_str, msg_id_str_to_tx, OPAL_PATH_MAX);
                opal_list_append(pending_rml_msgs, &(rml_msg->super));
            } while(1);
        }
    }

out:
    return rc;
}
Example #22
0
static int orted_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag, int fd)
{
    int flags;
    opal_list_item_t *item;
    orte_iof_proc_t *proct;
    orte_iof_sink_t *sink;
    char *outfile;
    int fdout;
    orte_job_t *jobdat=NULL;
    int np, numdigs;
    orte_ns_cmp_bitmask_t mask;

    OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
                         "%s iof:orted pushing fd %d for process %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         fd, ORTE_NAME_PRINT(dst_name)));
    
    /* set the file descriptor to non-blocking - do this before we setup
     * and activate the read event in case it fires right away
     */
    if((flags = fcntl(fd, F_GETFL, 0)) < 0) {
        opal_output(orte_iof_base.iof_output, "[%s:%d]: fcntl(F_GETFL) failed with errno=%d\n", 
                    __FILE__, __LINE__, errno);
    } else {
        flags |= O_NONBLOCK;
        fcntl(fd, F_SETFL, flags);
    }

    /* do we already have this process in our list? */
    for (item = opal_list_get_first(&mca_iof_orted_component.procs);
         item != opal_list_get_end(&mca_iof_orted_component.procs);
         item = opal_list_get_next(item)) {
        proct = (orte_iof_proc_t*)item;
        
        mask = ORTE_NS_CMP_ALL;

        if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &proct->name, dst_name)) {
            /* found it */
            goto SETUP;
        }
    }
    /* if we get here, then we don't yet have this proc in our list */
    proct = OBJ_NEW(orte_iof_proc_t);
    proct->name.jobid = dst_name->jobid;
    proct->name.vpid = dst_name->vpid;
    opal_list_append(&mca_iof_orted_component.procs, &proct->super);
    /* see if we are to output to a file */
    if (NULL != orte_output_filename) {
        /* get the local jobdata for this proc */
        if (NULL == (jobdat = orte_get_job_data_object(proct->name.jobid))) {
            ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
            return ORTE_ERR_NOT_FOUND;
        }
        np = jobdat->num_procs / 10;
        /* determine the number of digits required for max vpid */
        numdigs = 1;
        while (np > 0) {
            numdigs++;
            np = np / 10;
        }
        /* construct the filename */
        asprintf(&outfile, "%s.%d.%0*lu", orte_output_filename,
                 (int)ORTE_LOCAL_JOBID(proct->name.jobid),
                 numdigs, (unsigned long)proct->name.vpid);
        /* create the file */
        fdout = open(outfile, O_CREAT|O_RDWR|O_TRUNC, 0644);
        free(outfile);
        if (fdout < 0) {
            /* couldn't be opened */
            ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE);
            return ORTE_ERR_FILE_OPEN_FAILURE;
        }
        /* define a sink to that file descriptor */
        ORTE_IOF_SINK_DEFINE(&sink, dst_name, fdout, ORTE_IOF_STDOUTALL,
                             orte_iof_base_write_handler,
                             &mca_iof_orted_component.sinks);
    }
    
SETUP:
    /* define a read event and activate it */
    if (src_tag & ORTE_IOF_STDOUT) {
        ORTE_IOF_READ_EVENT(&proct->revstdout, dst_name, fd, ORTE_IOF_STDOUT,
                            orte_iof_orted_read_handler, false);
    } else if (src_tag & ORTE_IOF_STDERR) {
        ORTE_IOF_READ_EVENT(&proct->revstderr, dst_name, fd, ORTE_IOF_STDERR,
                            orte_iof_orted_read_handler, false);
    } else if (src_tag & ORTE_IOF_STDDIAG) {
        ORTE_IOF_READ_EVENT(&proct->revstddiag, dst_name, fd, ORTE_IOF_STDDIAG,
                            orte_iof_orted_read_handler, false);
    }
    /* if -all- of the readevents for this proc have been defined, then
     * activate them. Otherwise, we can think that the proc is complete
     * because one of the readevents fires -prior- to all of them having
     * been defined!
     */
    if (NULL != proct->revstdout && NULL != proct->revstderr && NULL != proct->revstddiag) {
        proct->revstdout->active = true;
        opal_event_add(proct->revstdout->ev, 0);
        proct->revstderr->active = true;
        opal_event_add(proct->revstderr->ev, 0);
        proct->revstddiag->active = true;
        opal_event_add(proct->revstddiag->ev, 0);
    }
    return ORTE_SUCCESS;
}
Example #23
0
int orte_daemon(int argc, char *argv[])
{
    int ret = 0;
    opal_cmd_line_t *cmd_line = NULL;
    char *rml_uri;
    int i;
    opal_buffer_t *buffer;
    char hostname[100];
    char *tmp_env_var = NULL;
    
    /* initialize the globals */
    memset(&orted_globals, 0, sizeof(orted_globals));
    /* initialize the singleton died pipe to an illegal value so we can detect it was set */
    orted_globals.singleton_died_pipe = -1;
    /* init the failure orted vpid to an invalid value */
    orted_globals.fail = ORTE_VPID_INVALID;
    
    /* setup to check common command line options that just report and die */
    cmd_line = OBJ_NEW(opal_cmd_line_t);
    if (OPAL_SUCCESS != opal_cmd_line_create(cmd_line, orte_cmd_line_opts)) {
        OBJ_RELEASE(cmd_line);
        exit(1);
    }
    mca_base_cmd_line_setup(cmd_line);
    if (ORTE_SUCCESS != (ret = opal_cmd_line_parse(cmd_line, false,
                                                   argc, argv))) {
        char *args = NULL;
        args = opal_cmd_line_get_usage_msg(cmd_line);
        fprintf(stderr, "Usage: %s [OPTION]...\n%s\n", argv[0], args);
        free(args);
        OBJ_RELEASE(cmd_line);
        return ret;
    }
    
    /*
     * Since this process can now handle MCA/GMCA parameters, make sure to
     * process them.
     */
    mca_base_cmd_line_process_args(cmd_line, &environ, &environ);
    
    /* Ensure that enough of OPAL is setup for us to be able to run */
    /*
     * NOTE: (JJH)
     *  We need to allow 'mca_base_cmd_line_process_args()' to process command
     *  line arguments *before* calling opal_init_util() since the command
     *  line could contain MCA parameters that affect the way opal_init_util()
     *  functions. AMCA parameters are one such option normally received on the
     *  command line that affect the way opal_init_util() behaves.
     *  It is "safe" to call mca_base_cmd_line_process_args() before 
     *  opal_init_util() since mca_base_cmd_line_process_args() does *not*
     *  depend upon opal_init_util() functionality.
     */
    if (OPAL_SUCCESS != opal_init_util(&argc, &argv)) {
        fprintf(stderr, "OPAL failed to initialize -- orted aborting\n");
        exit(1);
    }

    /* save the environment for launch purposes. This MUST be
     * done so that we can pass it to any local procs we
     * spawn - otherwise, those local procs won't see any
     * non-MCA envars that were set in the enviro when the
     * orted was executed - e.g., by .csh
     */
    orte_launch_environ = opal_argv_copy(environ);
    
    /* purge any ess flag set in the environ when we were launched */
    opal_unsetenv("OMPI_MCA_ess", &orte_launch_environ);
    
    /* if orte_daemon_debug is set, let someone know we are alive right
     * away just in case we have a problem along the way
     */
    if (orted_globals.debug) {
        gethostname(hostname, 100);
        fprintf(stderr, "Daemon was launched on %s - beginning to initialize\n", hostname);
    }
    
    /* check for help request */
    if (orted_globals.help) {
        char *args = NULL;
        args = opal_cmd_line_get_usage_msg(cmd_line);
        orte_show_help("help-orted.txt", "orted:usage", false,
                       argv[0], args);
        free(args);
        return 1;
    }
#if defined(HAVE_SETSID) && !defined(__WINDOWS__)
    /* see if we were directed to separate from current session */
    if (orted_globals.set_sid) {
        setsid();
    }
#endif  /* !defined(__WINDOWS__) */
    /* see if they want us to spin until they can connect a debugger to us */
    i=0;
    while (orted_spin_flag) {
        i++;
        if (1000 < i) i=0;        
    }

#if OPAL_ENABLE_FT_CR == 1
    /* Mark as a tool program */
    tmp_env_var = mca_base_param_env_var("opal_cr_is_tool");
    opal_setenv(tmp_env_var,
                "1",
                true, &environ);
    free(tmp_env_var);
#endif
    tmp_env_var = NULL; /* Silence compiler warning */

    /* if mapreduce set, flag it */
    if (orted_globals.mapreduce) {
        orte_map_reduce = true;
    }

    /* Set the flag telling OpenRTE that I am NOT a
     * singleton, but am "infrastructure" - prevents setting
     * up incorrect infrastructure that only a singleton would
     * require.
     */
    if (orted_globals.hnp) {
        if (ORTE_SUCCESS != (ret = orte_init(&argc, &argv, ORTE_PROC_HNP))) {
            ORTE_ERROR_LOG(ret);
            return ret;
        }
    } else {
        if (ORTE_SUCCESS != (ret = orte_init(&argc, &argv, ORTE_PROC_DAEMON))) {
            ORTE_ERROR_LOG(ret);
            return ret;
        }
    }
    /* finalize the OPAL utils. As they are opened again from orte_init->opal_init
     * we continue to have a reference count on them. So we have to finalize them twice...
     */
    opal_finalize_util();

    if ((int)ORTE_VPID_INVALID != orted_globals.fail) {
        orted_globals.abort=false;
        /* some vpid was ordered to fail. The value can be positive
         * or negative, depending upon the desired method for failure,
         * so need to check both here
         */
        if (0 > orted_globals.fail) {
            orted_globals.fail = -1*orted_globals.fail;
            orted_globals.abort = true;
        }
        /* are we the specified vpid? */
        if ((int)ORTE_PROC_MY_NAME->vpid == orted_globals.fail) {
            /* if the user specified we delay, then setup a timer
             * and have it kill us
             */
            if (0 < orted_globals.fail_delay) {
                ORTE_TIMER_EVENT(orted_globals.fail_delay, 0, shutdown_callback, ORTE_SYS_PRI);
                
            } else {
                opal_output(0, "%s is executing clean %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                            orted_globals.abort ? "abort" : "abnormal termination");

                /* do -not- call finalize as this will send a message to the HNP
                 * indicating clean termination! Instead, just forcibly cleanup
                 * the local session_dir tree and exit
                 */
                orte_session_dir_cleanup(ORTE_JOBID_WILDCARD);
                
                /* if we were ordered to abort, do so */
                if (orted_globals.abort) {
                    abort();
                }
                
                /* otherwise, return with non-zero status */
                ret = ORTE_ERROR_DEFAULT_EXIT_CODE;
                goto DONE;
            }
        }
    }

    /* detach from controlling terminal
     * otherwise, remain attached so output can get to us
     */
    if(!orte_debug_flag &&
       !orte_debug_daemons_flag &&
       orted_globals.daemonize) {
        opal_daemon_init(NULL);
    }
    
    /* insert our contact info into our process_info struct so we
     * have it for later use and set the local daemon field to our name
     */
    orte_process_info.my_daemon_uri = orte_rml.get_contact_info();
    ORTE_PROC_MY_DAEMON->jobid = ORTE_PROC_MY_NAME->jobid;
    ORTE_PROC_MY_DAEMON->vpid = ORTE_PROC_MY_NAME->vpid;
    
    /* if I am also the hnp, then update that contact info field too */
    if (ORTE_PROC_IS_HNP) {
        orte_process_info.my_hnp_uri = orte_rml.get_contact_info();
        ORTE_PROC_MY_HNP->jobid = ORTE_PROC_MY_NAME->jobid;
        ORTE_PROC_MY_HNP->vpid = ORTE_PROC_MY_NAME->vpid;
    }
    
    /* setup the primary daemon command receive function */
    ret = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DAEMON,
                                  ORTE_RML_PERSISTENT, orte_daemon_recv, NULL);
    if (ret != ORTE_SUCCESS && ret != ORTE_ERR_NOT_IMPLEMENTED) {
        ORTE_ERROR_LOG(ret);
        goto DONE;
    }
    
    /* output a message indicating we are alive, our name, and our pid
     * for debugging purposes
     */
    if (orte_debug_daemons_flag) {
        fprintf(stderr, "Daemon %s checking in as pid %ld on host %s\n",
                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (long)orte_process_info.pid,
                orte_process_info.nodename);
    }

    /* We actually do *not* want the orted to voluntarily yield() the
       processor more than necessary.  The orted already blocks when
       it is doing nothing, so it doesn't use any more CPU cycles than
       it should; but when it *is* doing something, we do not want it
       to be unnecessarily delayed because it voluntarily yielded the
       processor in the middle of its work.

       For example: when a message arrives at the orted, we want the
       OS to wake up the orted in a timely fashion (which most OS's
       seem good about doing) and then we want the orted to process
       the message as fast as possible.  If the orted yields and lets
       aggressive MPI applications get the processor back, it may be a
       long time before the OS schedules the orted to run again
       (particularly if there is no IO event to wake it up).  Hence,
       routed OOB messages (for example) may be significantly delayed
       before being delivered to MPI processes, which can be
       problematic in some scenarios (e.g., COMM_SPAWN, BTL's that
       require OOB messages for wireup, etc.). */
    opal_progress_set_yield_when_idle(false);

    /* Change the default behavior of libevent such that we want to
       continually block rather than blocking for the default timeout
       and then looping around the progress engine again.  There
       should be nothing in the orted that cannot block in libevent
       until "something" happens (i.e., there's no need to keep
       cycling through progress because the only things that should
       happen will happen in libevent).  This is a minor optimization,
       but what the heck... :-) */
    opal_progress_set_event_flag(OPAL_EVLOOP_ONCE);

    /* if requested, obtain and report a new process name and my uri to the indicated pipe */
    if (orted_globals.uri_pipe > 0) {
        orte_job_t *jdata;
        orte_proc_t *proc;
        orte_node_t *node;
        orte_app_context_t *app;
        char *tmp, *nptr, *sysinfo;
        int32_t ljob;

        /* setup the singleton's job */
        jdata = OBJ_NEW(orte_job_t);
        orte_plm_base_create_jobid(jdata);
        ljob = ORTE_LOCAL_JOBID(jdata->jobid);
        opal_pointer_array_set_item(orte_job_data, ljob, jdata);

        /* must create a map for it (even though it has no
         * info in it) so that the job info will be picked
         * up in subsequent pidmaps or other daemons won't
         * know how to route
         */
        jdata->map = OBJ_NEW(orte_job_map_t);

        /* setup an app_context for the singleton */
        app = OBJ_NEW(orte_app_context_t);
        app->app = strdup("singleton");
        app->num_procs = 1;
        opal_pointer_array_add(jdata->apps, app);
        
#if 0
        /* run our local allocator to read the available
         * allocation in case this singleton decides to
         * comm_spawn other procs
         */
        if (ORTE_SUCCESS != (ret = orte_ras.allocate(jdata))) {
            ORTE_ERROR_LOG(ret);
            /* don't quit as this would cause the singleton
             * to hang!
             */
        }
#endif
        
        /* setup a proc object for the singleton - since we
         * -must- be the HNP, and therefore we stored our
         * node on the global node pool, and since the singleton
         * -must- be on the same node as us, indicate that
         */
        proc = OBJ_NEW(orte_proc_t);
        proc->name.jobid = jdata->jobid;
        proc->name.vpid = 0;
        proc->alive = true;
        proc->state = ORTE_PROC_STATE_RUNNING;
        proc->app_idx = 0;
        /* obviously, it is on my node */
        node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0);
        proc->node = node;
        OBJ_RETAIN(node);  /* keep accounting straight */
        opal_pointer_array_add(jdata->procs, proc);
        jdata->num_procs = 1;
        /* and obviously it is one of my local procs */
        OBJ_RETAIN(proc);
        opal_pointer_array_add(orte_local_children, proc);
        jdata->num_local_procs = 1;
        /* set the trivial */
        proc->local_rank = 0;
        proc->node_rank = 0;
        proc->app_rank = 0;
        proc->state = ORTE_PROC_STATE_RUNNING;
        proc->alive = true;
        proc->app_idx = 0;
        proc->local_proc = true;
#if OPAL_HAVE_HWLOC
        proc->bind_idx = 0;
#endif

        /* the singleton will use the first three collectives
         * for its modex/barriers
         */
        orte_grpcomm_base.coll_id += 3;

        /* need to setup a pidmap for it */
        jdata->pmap = (opal_byte_object_t*)malloc(sizeof(opal_byte_object_t));
        if (ORTE_SUCCESS != (ret = orte_util_encode_pidmap(jdata->pmap))) {
            ORTE_ERROR_LOG(ret);
            goto DONE;
        }
    
    
        /* if we don't yet have a daemon map, then we have to generate one
         * to pass back to it
         */
        if (NULL == orte_odls_globals.dmap) {
            orte_odls_globals.dmap = (opal_byte_object_t*)malloc(sizeof(opal_byte_object_t));
            /* construct a nodemap */
            if (ORTE_SUCCESS != (ret = orte_util_encode_nodemap(orte_odls_globals.dmap))) {
                ORTE_ERROR_LOG(ret);
                goto DONE;
            }
        }

        /* create a string that contains our uri + the singleton's name + sysinfo */
        orte_util_convert_process_name_to_string(&nptr, &proc->name);
        orte_util_convert_sysinfo_to_string(&sysinfo, orte_local_cpu_type, orte_local_cpu_model);
        asprintf(&tmp, "%s[%s][%s]", orte_process_info.my_daemon_uri, nptr, sysinfo);
        free(nptr);
	free(sysinfo);

        /* pass that info to the singleton */
#ifndef __WINDOWS__
        write(orted_globals.uri_pipe, tmp, strlen(tmp)+1); /* need to add 1 to get the NULL */
#else
        send(orted_globals.uri_pipe, tmp, strlen(tmp)+1, 0); /* need to add 1 to get the NULL */
#endif

        /* cleanup */
        free(tmp);
    }

    /* if we were given a pipe to monitor for singleton termination, set that up */
    if (orted_globals.singleton_died_pipe > 0) {
        /* register shutdown handler */
        pipe_handler = (opal_event_t*)malloc(sizeof(opal_event_t));
        opal_event_set(orte_event_base, pipe_handler,
                       orted_globals.singleton_died_pipe,
                       OPAL_EV_READ,
                       pipe_closed,
                       pipe_handler);
        opal_event_add(pipe_handler, NULL);
    }

    /* If I have a parent, then save his contact info so
     * any messages we send can flow thru him.
     */
    mca_base_param_reg_string_name("orte", "parent_uri",
                                   "URI for the parent if tree launch is enabled.",
                                   true, false, NULL,  &rml_uri);
    if (NULL != rml_uri) {
        orte_process_name_t parent;

        /* set the contact info into the hash table */
        if (ORTE_SUCCESS != (ret = orte_rml.set_contact_info(rml_uri))) {
            ORTE_ERROR_LOG(ret);
            free(rml_uri);
            goto DONE;
        }
        ret = orte_rml_base_parse_uris(rml_uri, &parent, NULL );
        if( ORTE_SUCCESS != ret ) {
            ORTE_ERROR_LOG(ret);
            free(rml_uri);
            goto DONE;
        }
        free(rml_uri);
        /* tell the routed module that we have a path
         * back to the HNP
         */
        if (ORTE_SUCCESS != (ret = orte_routed.update_route(ORTE_PROC_MY_HNP, &parent))) {
            ORTE_ERROR_LOG(ret);
            goto DONE;
        }
        /* set the lifeline to point to our parent so that we
         * can handle the situation if that lifeline goes away
         */
        if (ORTE_SUCCESS != (ret = orte_routed.set_lifeline(&parent))) {
            ORTE_ERROR_LOG(ret);
            goto DONE;
        }
    }

    /* if we are not the HNP...the only time we will be an HNP
     * is if we are launched by a singleton to provide support
     * for it
     */
    if (!ORTE_PROC_IS_HNP) {
        /* send the information to the orted report-back point - this function
         * will process the data, but also counts the number of
         * orteds that reported back so the launch procedure can continue.
         * We need to do this at the last possible second as the HNP
         * can turn right around and begin issuing orders to us
         */

        buffer = OBJ_NEW(opal_buffer_t);
        /* insert our name for rollup purposes */
        if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, ORTE_PROC_MY_NAME, 1, ORTE_NAME))) {
            ORTE_ERROR_LOG(ret);
            OBJ_RELEASE(buffer);
            goto DONE;
        }
        /* for now, always include our contact info, even if we are using
         * static ports. Eventually, this will be removed
         */
        rml_uri = orte_rml.get_contact_info();
        if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &rml_uri, 1, OPAL_STRING))) {
            ORTE_ERROR_LOG(ret);
            OBJ_RELEASE(buffer);
            goto DONE;
        }

        /* include our node name */
        opal_dss.pack(buffer, &orte_process_info.nodename, 1, OPAL_STRING);
        
#if OPAL_HAVE_HWLOC
        /* add the local topology */
        if (NULL != opal_hwloc_topology &&
            (1 == ORTE_PROC_MY_NAME->vpid || orte_hetero_nodes)) {
            if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &opal_hwloc_topology, 1, OPAL_HWLOC_TOPO))) {
                ORTE_ERROR_LOG(ret);
            }
        }
#endif

        if ((orte_static_ports || orte_use_common_port) && !orted_globals.tree_spawn) {
            /* use the rollup collective to send our data to the HNP
             * so we minimize the HNP bottleneck
             */
            orte_grpcomm_collective_t *coll;
            coll = OBJ_NEW(orte_grpcomm_collective_t);
            /* get the list of contributors we need from the routed module */
            orte_routed.get_routing_list(ORTE_GRPCOMM_COLL_PEERS, coll);
            /* add the collective to our list */
            opal_list_append(&orte_grpcomm_base.active_colls, &coll->super);
            /* send the buffer to ourselves to start the collective */
            if (0 > (ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_NAME, buffer,
                                                   ORTE_RML_TAG_ROLLUP, 0,
                                                   rml_cbfunc, NULL))) {
                ORTE_ERROR_LOG(ret);
                OBJ_RELEASE(buffer);
                goto DONE;
            }
        } else {
            /* send directly to the HNP's callback */
            if (0 > (ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buffer,
                                                   ORTE_RML_TAG_ORTED_CALLBACK, 0,
                                                   rml_cbfunc, NULL))) {
                ORTE_ERROR_LOG(ret);
                OBJ_RELEASE(buffer);
                goto DONE;
            }
        }
    }

    if (orte_debug_daemons_flag) {
        opal_output(0, "%s orted: up and running - waiting for commands!", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
    }

    /* loop the event lib until an exit event is detected */
    while (orte_event_base_active) {
        opal_event_loop(orte_event_base, OPAL_EVLOOP_ONCE);
    }

    /* ensure all local procs are dead */
    orte_odls.kill_local_procs(NULL);

 DONE:
    /* update the exit status, in case it wasn't done */
    ORTE_UPDATE_EXIT_STATUS(orte_exit_status);

    /* cleanup and leave */
    orte_finalize();

    if (orte_debug_flag) {
        fprintf(stderr, "exiting with status %d\n", orte_exit_status);
    }
    exit(orte_exit_status);
}
Example #24
0
static int store(const orte_process_name_t *proc,
                 const char *key, const void *data, opal_data_type_t type)
{
    int i;
    job_data_t *jtable, *jtab;
    proc_data_t *proc_data;
    opal_value_t *kv;
    opal_byte_object_t *boptr;

    OPAL_OUTPUT_VERBOSE((5, orte_db_base.output,
                         "%s db:hash:store: storing key %s data type %d for proc %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         key, (int)type, ORTE_NAME_PRINT(proc)));

    /* get the job data object for this proc */
    jtable = NULL;
    for (i=0; i < job_data.size; i++) {
        if (NULL == (jtab = (job_data_t*)opal_pointer_array_get_item(&job_data, i))) {
            continue;
        }
        if (jtab->jobid == proc->jobid) {
            jtable = jtab;
            break;
        }
    }
    if (NULL == jtable) {
        /* need to add an entry for this job */
        jtable = OBJ_NEW(job_data_t);
        jtable->jobid = proc->jobid;
        opal_pointer_array_add(&job_data, jtable);
    }

    /* lookup the proc data object for this proc */
    if (NULL == (proc_data = lookup_orte_proc(jtable->data, proc->vpid))) {
        /* unrecoverable error */
        return ORTE_ERR_OUT_OF_RESOURCE;
    }

    /* see if we already have this key in the data - means we are updating
     * a pre-existing value
     */
    if (NULL != (kv = lookup_keyval(proc_data, key))) {
        opal_list_remove_item(&proc_data->data, &kv->super);
        OBJ_RELEASE(kv);
    }
    kv = OBJ_NEW(opal_value_t);
    kv->key = strdup(key);
    opal_list_append(&proc_data->data, &kv->super);

    /* the type could come in as an ORTE one (e.g., ORTE_VPID). Since
     * the value is an OPAL definition, it cannot cover ORTE data
     * types, so convert to the underlying OPAL type
     */
    switch (type) {
    case OPAL_STRING:
        kv->type = OPAL_STRING;
        kv->data.string = strdup( (const char *) data);
        break;
    case ORTE_VPID:
    case OPAL_UINT32:
        kv->type = OPAL_UINT32;
        kv->data.uint32 = *(uint32_t*)data;
        break;
    case OPAL_UINT16:
        kv->type = OPAL_UINT16;
        kv->data.uint16 = *(uint16_t*)(data);
        break;
    case OPAL_INT:
        kv->type = OPAL_INT;
        kv->data.integer = *(int*)(data);
        break;
    case OPAL_UINT:
        kv->type = OPAL_UINT;
        kv->data.uint = *(unsigned int*)(data);
        break;
    case OPAL_BYTE_OBJECT:
        kv->type = OPAL_BYTE_OBJECT;
        boptr = (opal_byte_object_t*)data;
        kv->data.bo.bytes = (uint8_t *) malloc(boptr->size);
        memcpy(kv->data.bo.bytes, boptr->bytes, boptr->size);
        kv->data.bo.size = boptr->size;
        break;
    default:
        ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED);
        return ORTE_ERR_NOT_SUPPORTED;
    }

    return ORTE_SUCCESS;
}
Example #25
0
File: db_hash.c Project: IanYXXL/A1
static int fetch_multiple(const opal_identifier_t *uid,
                          opal_scope_t scope,
                          const char *key,
                          opal_list_t *kvs)
{
    proc_data_t *proc_data;
    opal_value_t *kv, *kvnew;
    int rc;
    char *srchkey, *ptr;
    size_t len = 0;
    opal_identifier_t id;

    /* to protect alignment, copy the data across */
    memcpy(&id, uid, sizeof(opal_identifier_t));

    OPAL_OUTPUT_VERBOSE((5, opal_db_base_framework.framework_output,
                         "db:hash:fetch_multiple: searching for key %s on proc %" PRIu64 "",
                         (NULL == key) ? "NULL" : key, id));

    /* lookup the proc data object for this proc */
    if (NULL == (proc_data = lookup_opal_proc(&hash_data, id))) {
        /* look elsewhere */
        return OPAL_ERR_TAKE_NEXT_OPTION;
    }

    /* if the key is NULL, then return all the values */
    if (NULL == key) {
        for (kv = (opal_value_t*) opal_list_get_first(&proc_data->data);
             kv != (opal_value_t*) opal_list_get_end(&proc_data->data);
             kv = (opal_value_t*) opal_list_get_next(kv)) {
            /* check for a matching scope */
            if (!(scope & kv->scope)) {
                continue;
            }
            if (OPAL_SUCCESS != (rc = opal_dss.copy((void**)&kvnew, kv, OPAL_VALUE))) {
                OPAL_ERROR_LOG(rc);
                return rc;
            }
            opal_list_append(kvs, &kvnew->super);
        }
        return OPAL_SUCCESS;
    }

    /* see if the key includes a wildcard */
    srchkey = strdup(key);
    if (NULL != (ptr = strchr(srchkey, '*'))) {
        *ptr = '\0';
        len = strlen(srchkey);
    }

    /* otherwise, find all matching keys and return them */
    for (kv = (opal_value_t*) opal_list_get_first(&proc_data->data);
         kv != (opal_value_t*) opal_list_get_end(&proc_data->data);
         kv = (opal_value_t*) opal_list_get_next(kv)) {
        /* check for a matching scope */
        if (!(scope & kv->scope)) {
            continue;
        }
        if ((0 < len && 0 == strncmp(srchkey, kv->key, len)) ||
            (0 == len && 0 == strcmp(key, kv->key))) {
            if (OPAL_SUCCESS != (rc = opal_dss.copy((void**)&kvnew, kv, OPAL_VALUE))) {
                OPAL_ERROR_LOG(rc);
                return rc;
            }
            opal_list_append(kvs, &kvnew->super);
        }
    }
    free(srchkey);
    return OPAL_SUCCESS;
}
Example #26
0
static int fetch_multiple(const orte_process_name_t *proc,
                          const char *key,
                          opal_list_t *kvs)
{
    int i;
    job_data_t *jtable, *jtab;
    proc_data_t *proc_data;
    opal_value_t *kv, *kvnew;
    int rc;
    char *srchkey, *ptr;
    size_t len = 0;

    OPAL_OUTPUT_VERBOSE((5, orte_db_base.output,
                         "%s db:hash:fetch_multiple: searching for key %s on proc %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         (NULL == key) ? "NULL" : key, ORTE_NAME_PRINT(proc)));

    /* get the job data object for this proc */
    jtable = NULL;
    for (i=0; i < job_data.size; i++) {
        if (NULL == (jtab = (job_data_t*)opal_pointer_array_get_item(&job_data, i))) {
            continue;
        }
        if (jtab->jobid == proc->jobid) {
            jtable = jtab;
            break;
        }
    }
    if (NULL == jtable) {
        /* eventually, we will fetch this data - but for now, this
         * is simply an error
         */
        return ORTE_ERR_NOT_FOUND;
    }

    /* lookup the proc data object for this proc */
    if (NULL == (proc_data = lookup_orte_proc(jtable->data, proc->vpid))) {
        /* unrecoverable error */
        return ORTE_ERR_OUT_OF_RESOURCE;
    }

    /* if the key is NULL, then return all the values */
    if (NULL == key) {
        for (kv = (opal_value_t*) opal_list_get_first(&proc_data->data);
             kv != (opal_value_t*) opal_list_get_end(&proc_data->data);
             kv = (opal_value_t*) opal_list_get_next(kv)) {
            if (OPAL_SUCCESS != (rc = opal_dss.copy((void**)&kvnew, kv, OPAL_VALUE))) {
                ORTE_ERROR_LOG(rc);
                return rc;
            }
            opal_list_append(kvs, &kvnew->super);
        }
        return ORTE_SUCCESS;
    }

    /* see if the key includes a wildcard */
    srchkey = strdup(key);
    if (NULL != (ptr = strchr(srchkey, '*'))) {
        *ptr = '\0';
        len = strlen(srchkey);
    }

    /* otherwise, find all matching keys and return them */
    for (kv = (opal_value_t*) opal_list_get_first(&proc_data->data);
         kv != (opal_value_t*) opal_list_get_end(&proc_data->data);
         kv = (opal_value_t*) opal_list_get_next(kv)) {
        if ((0 < len && 0 == strncmp(srchkey, kv->key, len)) ||
            (0 == len && 0 == strcmp(key, kv->key))) {
            if (OPAL_SUCCESS != (rc = opal_dss.copy((void**)&kvnew, kv, OPAL_VALUE))) {
                ORTE_ERROR_LOG(rc);
                return rc;
            }
            opal_list_append(kvs, &kvnew->super);
        }
    }
    free(srchkey);
    return ORTE_SUCCESS;
}
Example #27
0
int mca_btl_udapl_put( 
    mca_btl_base_module_t* btl,
    mca_btl_base_endpoint_t* endpoint,
    mca_btl_base_descriptor_t* des)
{
    DAT_RMR_TRIPLET remote_buffer;
    DAT_DTO_COOKIE cookie;
    int rc = OMPI_SUCCESS;
    
    mca_btl_udapl_frag_t* frag = (mca_btl_udapl_frag_t*)des;
    mca_btl_udapl_segment_t *dst_segment = des->des_dst;

    frag->btl = (mca_btl_udapl_module_t *)btl;
    frag->endpoint = endpoint;
    frag->type = MCA_BTL_UDAPL_PUT;

    if (OPAL_THREAD_ADD32(&endpoint->endpoint_lwqe_tokens[BTL_UDAPL_MAX_CONNECTION], -1) < 0) {
        /* no local work queue tokens available */
        OPAL_THREAD_ADD32(&endpoint->endpoint_lwqe_tokens[BTL_UDAPL_MAX_CONNECTION], 1);
        OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
        opal_list_append(&endpoint->endpoint_max_frags,
            (opal_list_item_t*)frag);
        OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
        opal_progress();
    } else {
        /* work queue tokens available, try to send  */

        if(OPAL_THREAD_ADD32(&endpoint->endpoint_sr_tokens[BTL_UDAPL_MAX_CONNECTION], -1) < 0) {
            OPAL_THREAD_ADD32(&endpoint->endpoint_lwqe_tokens[BTL_UDAPL_MAX_CONNECTION], 1);
            OPAL_THREAD_ADD32(&endpoint->endpoint_sr_tokens[BTL_UDAPL_MAX_CONNECTION], 1);
            OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
            opal_list_append(&endpoint->endpoint_max_frags,
                (opal_list_item_t*)frag);
            OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
            opal_progress();
        } else {
            frag->triplet.segment_length = frag->segment.base.seg_len;
        
            remote_buffer.rmr_context = dst_segment->context;
            remote_buffer.target_address =
                (DAT_VADDR)(uintptr_t)dst_segment->base.seg_addr.lval;
            remote_buffer.segment_length = dst_segment->base.seg_len;

            cookie.as_ptr = frag;
            OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
            rc = dat_ep_post_rdma_write(endpoint->endpoint_max,
                1,
                &frag->triplet,
                cookie,
                &remote_buffer,        
                DAT_COMPLETION_DEFAULT_FLAG);
            OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
            if(DAT_SUCCESS != rc) {
                char* major;
                char* minor;

                dat_strerror(rc, (const char**)&major,
                    (const char**)&minor);
                BTL_ERROR(("ERROR: %s %s %s\n", "dat_ep_post_rdma_write",
                    major, minor));
                rc = OMPI_ERROR;
            }
        }
    }
    
    return rc;
}
Example #28
0
/*
 * Find all available RAS components and sort them according to
 * priority
 */
int orte_ras_base_find_available(void)
{
    opal_list_item_t *item;
    mca_base_component_list_item_t *cli;
    orte_ras_base_component_t *component;
    orte_ras_base_module_t *module;
    int priority, rc;
    orte_ras_base_cmp_t *cmp;

    orte_ras_base.ras_available_valid = false;
    
    if (orte_ras_base.ras_opened_valid) {
        OBJ_CONSTRUCT(&orte_ras_base.ras_available, opal_list_t);
        orte_ras_base.ras_available_valid = true;

        for (item = opal_list_get_first(&orte_ras_base.ras_opened);
             opal_list_get_end(&orte_ras_base.ras_opened) != item;
             item = opal_list_get_next(item)) {
            cli = (mca_base_component_list_item_t *) item;
            component = (orte_ras_base_component_t *) cli->cli_component;
            opal_output(orte_ras_base.ras_output,
                        "orte:ras:base:open: querying component %s",
                        component->ras_version.mca_component_name);

            /* Call the component's init function and see if it wants to be
               selected */

            module = component->ras_init(&priority);

            /* If we got a non-NULL module back, then the component wants
               to be considered for selection */

            if (NULL != module) {
                opal_output(orte_ras_base.ras_output,
                            "orte:ras:base:open: component %s returns priority %d",
                            component->ras_version.mca_component_name,
                            priority);

                cmp = OBJ_NEW(orte_ras_base_cmp_t);
                cmp->component = component;
                cmp->module = module;
                cmp->priority = priority;

                opal_list_append(&orte_ras_base.ras_available, &cmp->super);
            } else {
                opal_output(orte_ras_base.ras_output,
                            "orte:ras:base:open: component %s does NOT want to be considered for selection",
                            component->ras_version.mca_component_name);
            }
        }

        /* Sort the resulting available list in priority order */
        opal_list_sort(&orte_ras_base.ras_available, compare);

        /* if we are an HNP, start the receive */
        if (orte_process_info.seed) {
            if (ORTE_SUCCESS  != (rc = orte_ras_base_comm_start())) {
                ORTE_ERROR_LOG(rc);
                return rc;
            }
        }
    }
    
    return ORTE_SUCCESS;
}
Example #29
0
/*
 * Function for selecting one component from all those that are
 * available.
 */
void orte_ras_base_allocate(int fd, short args, void *cbdata)
{
    int rc;
    orte_job_t *jdata;
    opal_list_t nodes;
    orte_node_t *node;
    orte_std_cntr_t i;
    orte_app_context_t *app;
    orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;

    OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
                         "%s ras:base:allocate",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    
    /* convenience */
    jdata = caddy->jdata;

    /* if we already did this, don't do it again - the pool of
     * global resources is set. 
     */
    if (orte_ras_base.allocation_read) {
        
        OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
                             "%s ras:base:allocate allocation already read",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
        goto next_state;
    }
    orte_ras_base.allocation_read = true;

    /* Otherwise, we have to create
     * the initial set of resources that will delineate all
     * further operations serviced by this HNP. This list will
     * contain ALL nodes that can be used by any subsequent job.
     *
     * In other words, if a node isn't found in this step, then
     * no job launched by this HNP will be able to utilize it.
     */
    
    /* construct a list to hold the results */
    OBJ_CONSTRUCT(&nodes, opal_list_t);

    /* if a component was selected, then we know we are in a managed
     * environment.  - the active module will return a list of what it found
     */
    if (NULL != orte_ras_base.active_module)  {
        /* read the allocation */
        if (ORTE_SUCCESS != (rc = orte_ras_base.active_module->allocate(jdata, &nodes))) {
            if (ORTE_ERR_ALLOCATION_PENDING == rc) {
                /* an allocation request is underway, so just do nothing */
                OBJ_DESTRUCT(&nodes);
                OBJ_RELEASE(caddy);
                return;
            }
            if (ORTE_ERR_SYSTEM_WILL_BOOTSTRAP == rc) {
                /* this module indicates that nodes will be discovered
                 * on a bootstrap basis, so all we do here is add our
                 * own node to the list
                 */
                goto addlocal;
            }
            if (ORTE_ERR_TAKE_NEXT_OPTION == rc) {
                /* we have an active module, but it is unable to
                 * allocate anything for this job - this indicates
                 * that it isn't a fatal error, but could be if
                 * an allocation is required
                 */
                if (orte_allocation_required) {
                    /* an allocation is required, so this is fatal */
                    OBJ_DESTRUCT(&nodes);
                    orte_show_help("help-ras-base.txt", "ras-base:no-allocation", true);
                    ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
                    OBJ_RELEASE(caddy);
                    return;
                } else {
                    /* an allocation is not required, so we can just
                     * run on the local node - go add it
                     */
                    goto addlocal;
                }
            }
            ORTE_ERROR_LOG(rc);
            OBJ_DESTRUCT(&nodes);
            ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
            OBJ_RELEASE(caddy);
            return;
        }
    } 
    /* If something came back, save it and we are done */
    if (!opal_list_is_empty(&nodes)) {
        /* store the results in the global resource pool - this removes the
         * list items
         */
        if (ORTE_SUCCESS != (rc = orte_ras_base_node_insert(&nodes, jdata))) {
            ORTE_ERROR_LOG(rc);
            OBJ_DESTRUCT(&nodes);
            ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
            OBJ_RELEASE(caddy);
            return;
        }
        OBJ_DESTRUCT(&nodes);
        /* default to no-oversubscribe-allowed for managed systems */
        if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
            ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE);
        }
        /* flag that the allocation is managed */
        orte_managed_allocation = true;
        goto DISPLAY;
    } else if (orte_allocation_required) {
        /* if nothing was found, and an allocation is
         * required, then error out
         */
        OBJ_DESTRUCT(&nodes);
        orte_show_help("help-ras-base.txt", "ras-base:no-allocation", true);
        ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
        OBJ_RELEASE(caddy);
        return;
    }
    
    OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
                         "%s ras:base:allocate nothing found in module - proceeding to hostfile",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    
    /* nothing was found, or no active module was alive. Our next
     * option is to look for a hostfile and assign our global
     * pool from there.
     *
     * Individual hostfile names, if given, are included
     * in the app_contexts for this job. We therefore need to
     * retrieve the app_contexts for the job, and then cycle
     * through them to see if anything is there. The parser will
     * add the nodes found in each hostfile to our list - i.e.,
     * the resulting list contains the UNION of all nodes specified
     * in hostfiles from across all app_contexts
     *
     * We then continue to add any hosts provided by dash-host and
     * the default hostfile, if we have it. We will then filter out
     * all the non-desired hosts (i.e., those not specified by
     * -host and/or -hostfile) when we start the mapping process
     *
     * Note that any relative node syntax found in the hostfiles will
     * generate an error in this scenario, so only non-relative syntax
     * can be present
     */
    if (NULL != orte_default_hostfile) {
        OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
                             "%s ras:base:allocate parsing default hostfile %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             orte_default_hostfile));
        
        /* a default hostfile was provided - parse it */
        if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&nodes,
                                                               orte_default_hostfile))) {
            ORTE_ERROR_LOG(rc);
            OBJ_DESTRUCT(&nodes);
            ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
            OBJ_RELEASE(caddy);
            return;
        }
    }
    for (i=0; i < jdata->apps->size; i++) {
        if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
            continue;
        }
        if (NULL != app->hostfile) {
            OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
                                 "%s ras:base:allocate adding hostfile %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 app->hostfile));
            
            /* hostfile was specified - parse it and add it to the list */
            if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&nodes,
                                                                   app->hostfile))) {
                ORTE_ERROR_LOG(rc);
                OBJ_DESTRUCT(&nodes);
                /* set an error event */
                ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
                OBJ_RELEASE(caddy);
                return;
            }
        } else if (!orte_soft_locations && NULL != app->dash_host) {
            /* if we are using soft locations, then any dash-host would
             * just include desired nodes and not required. We don't want
             * to pick them up here as this would mean the request was
             * always satisfied - instead, we want to allow the request
             * to fail later on and use whatever nodes are actually
             * available
             */
            OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
                                 "%s ras:base:allocate adding dash_hosts",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
            if (ORTE_SUCCESS != (rc = orte_util_add_dash_host_nodes(&nodes,
                                                                    app->dash_host))) {
                ORTE_ERROR_LOG(rc);
                OBJ_DESTRUCT(&nodes);
                ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
                OBJ_RELEASE(caddy);
                return;
            }
        }
    }

    /* if something was found in the hostfile(s), we use that as our global
     * pool - set it and we are done
     */
    if (!opal_list_is_empty(&nodes)) {
        /* store the results in the global resource pool - this removes the
         * list items
         */
        if (ORTE_SUCCESS != (rc = orte_ras_base_node_insert(&nodes, jdata))) {
            ORTE_ERROR_LOG(rc);
            ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
            OBJ_RELEASE(caddy);
            return;
        }
        /* cleanup */
        OBJ_DESTRUCT(&nodes);
        goto DISPLAY;
    }
    
    OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
                         "%s ras:base:allocate nothing found in hostfiles - checking for rankfile",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    
    /* Our next option is to look for a rankfile - if one was provided, we
     * will use its nodes to create a default allocation pool
     */
    if (NULL != orte_rankfile) {
        /* check the rankfile for node information */
        if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&nodes,
                                                               orte_rankfile))) {
            ORTE_ERROR_LOG(rc);
            OBJ_DESTRUCT(&nodes);
            ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
            OBJ_RELEASE(caddy);
            return ;
        }
    }
    /* if something was found in rankfile, we use that as our global
     * pool - set it and we are done
     */
    if (!opal_list_is_empty(&nodes)) {
        /* store the results in the global resource pool - this removes the
         * list items
         */
        if (ORTE_SUCCESS != (rc = orte_ras_base_node_insert(&nodes, jdata))) {
            ORTE_ERROR_LOG(rc);
            ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
            OBJ_RELEASE(caddy);
            return;
        }
        /* rankfile is considered equivalent to an RM allocation */
        if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
            ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE);
        }
        /* cleanup */
        OBJ_DESTRUCT(&nodes);
        goto DISPLAY;
    }
    
    
    OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
                         "%s ras:base:allocate nothing found in rankfile - inserting current node",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    
 addlocal:
    /* if nothing was found by any of the above methods, then we have no
     * earthly idea what to do - so just add the local host
     */
    node = OBJ_NEW(orte_node_t);
    if (NULL == node) {
        ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
        OBJ_DESTRUCT(&nodes);
        ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
        OBJ_RELEASE(caddy);
        return;
    }
    /* use the same name we got in orte_process_info so we avoid confusion in
     * the session directories
     */
    node->name = strdup(orte_process_info.nodename);
    node->state = ORTE_NODE_STATE_UP;
    node->slots_inuse = 0;
    node->slots_max = 0;
    node->slots = 1;
    opal_list_append(&nodes, &node->super);
    
    /* store the results in the global resource pool - this removes the
     * list items
     */
    if (ORTE_SUCCESS != (rc = orte_ras_base_node_insert(&nodes, jdata))) {
        ORTE_ERROR_LOG(rc);
        OBJ_DESTRUCT(&nodes);
        ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
        OBJ_RELEASE(caddy);
        return;
    }
    OBJ_DESTRUCT(&nodes);

 DISPLAY:
    /* shall we display the results? */
    if (4 < opal_output_get_verbosity(orte_ras_base_framework.framework_output)) {
        orte_ras_base_display_alloc();
    }

 next_state:
    /* are we to report this event? */
    if (orte_report_events) {
        if (ORTE_SUCCESS != (rc = orte_util_comm_report_event(ORTE_COMM_EVENT_ALLOCATE))) {
            ORTE_ERROR_LOG(rc);
            ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
            OBJ_RELEASE(caddy);
        }
    }
    
    /* set total slots alloc */
    jdata->total_slots_alloc = orte_ras_base.total_slots_alloc;

    /* set the job state to the next position */
    ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_ALLOCATION_COMPLETE);

    /* cleanup */
    OBJ_RELEASE(caddy);
}
static int discover(opal_list_t* nodelist, ICluster* pCluster)
{
    int ret = ORTE_ERROR;
    int32_t nodeid;
    orte_node_t *node;
    opal_list_item_t* item;
    opal_list_t new_nodes;
    struct timeval start, stop;

    HRESULT hr = S_OK;  
    long idle_processors = 0;
    IClusterEnumerable* pNodesCollection = NULL;
    IEnumVARIANT* pNodes = NULL;
    INode* pNode = NULL;
    BSTR node_name = NULL, node_arch = NULL;
    VARIANT var;
    NodeStatus Status;
    size_t len;

    /* check for timing request - get start time if so */
    if (orte_timing) {
        gettimeofday(&start, NULL);
    }
    
    /* Get the collection of nodes. */
    hr = pCluster->get_ComputeNodes(&pNodesCollection);
    if (FAILED(hr)) {
        ras_get_cluster_message(pCluster);
        OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
                            "ras:ccp:pCluster->get_ComputeNodes failed."));
        return ORTE_ERROR;
    }

    /* Get the enumerator used to iterate through the collection. */
    hr = pNodesCollection->GetEnumerator(&pNodes);
    if (FAILED(hr)) {
        ras_get_cluster_message(pCluster);
        OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
                            "ras:ccp:pNodesCollection->GetEnumerator failed."));
        return ORTE_ERROR;
    }

    VariantInit(&var);

    /* Construct new node list. */
    OBJ_CONSTRUCT(&new_nodes, opal_list_t);
    nodeid=0;

    /* Loop through the collection. */
    while (hr = pNodes->Next(1, &var, NULL) == S_OK) {
        var.pdispVal->QueryInterface(IID_INode, reinterpret_cast<void **> (&pNode));

        /* Check wether the node is ready. 
         * There are four states:
         *          NodeStatus_Ready = 0,
         *          NodeStatus_Paused = 1,
         *          NodeStatus_Unreachable = 2, probably not a windows cluster node.
         *          NodeStatus_PendingApproval = 3
         */
        hr = pNode->get_Status(&Status);
        if (FAILED(hr)) {
            OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
                                "ras:ccp:pNode->get_Status failed."));
            ret = ORTE_ERROR;
            goto cleanup;
        }

        /* Get available number of processors on each node. */
        hr = pNode->get_NumberOfIdleProcessors(&idle_processors);
        if (FAILED(hr)) {
            OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
                                "ras:ccp:pNode->get_NumberOfIdleProcessors failed."));
            ret = ORTE_ERROR;
            goto cleanup;
        }

        /* Do we have enough processors on the available nodes? 
         * Question: How do we get the required number of processors?
         */
        if ( (Status == NodeStatus_Ready) && (idle_processors > 0) ) {

            /* Get node name. */
            hr = pNode->get_Name(&node_name);
            if (FAILED(hr)) {
                OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
                                    "ras:ccp:pNode->get_Name failed."));
                ret = ORTE_ERROR;
                goto cleanup;
            }

            /* Get node processor architecture. */
            hr = pNode->get_ProcessorArchitecture(&node_arch);
            if (FAILED(hr)) {
                OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
                                    "ras:ccp:pNode->get_ProcessorArchitecture failed."));
                ret = ORTE_ERROR;
                goto cleanup;
            }

            /* Prevent duplicated nodes in the list*/
            for (item = opal_list_get_first(&new_nodes);
                 opal_list_get_end(&new_nodes) != item;
                 item = opal_list_get_next(item)) {

                node = (orte_node_t*) item;
                if (0 == strcmp(node->name, (char *)node_name)) {
                    ++node->slots;
                    OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
                                        "ras:ccp:allocate:discover: found -- bumped slots to %d",
                                        node->slots));
                    break;
                }
            }
            /* Did we find it? */

            if (opal_list_get_end(&new_nodes) == item) {

                /* Nope -- didn't find it, so add a new item to the list */

                OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
                                    "ras:ccp:allocate:discover: not found -- added to list"));

                node = OBJ_NEW(orte_node_t);

                /* The function _dupenv_s is much safer than getenv on Windows. */
                _dupenv_s(&node->username, &len, "username");

                node->name = _com_util::ConvertBSTRToString(node_name);
                node->launch_id = nodeid;
                node->slots_inuse = 0;
                node->slots_max = 0;
                node->slots = 1;
                opal_list_append(nodelist, &node->super);
            } 
            /* up the nodeid */
            nodeid++;
        }

        pNode->Release();
        VariantClear(&var);
    }

    pNodes->Release();

    if (nodeid > 0) ret = ORTE_SUCCESS;

    /* All done */
cleanup:

    if (ORTE_SUCCESS == ret) {
        OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
                            "ras:ccp:allocate:discover: success"));
    } else {
        OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
                            "ras:ccp:allocate:discover: failed (rc=%d)", ret));
    }

    OBJ_DESTRUCT(&new_nodes);
    SysFreeString(node_name);
    SysFreeString(node_arch);

    /* check for timing request - get stop time and report elapsed time if so */
    if (orte_timing) {
        gettimeofday(&stop, NULL);
        opal_output(0, "ras_ccp: time to allocate is %ld usec",
                    (long int)((stop.tv_sec - start.tv_sec)*1000000 +
                               (stop.tv_usec - start.tv_usec)));
        gettimeofday(&start, NULL);
    }
    
    return ret;
}