/*
 * Given a filename, see if it appears to be of the proper filename
 * format.  If so, save it in the array so that we can process it
 * later.
 */
static int save_filename(const char *filename, lt_ptr data)
{
  size_t len, prefix_len, total_len;
  char *prefix;
  const char *basename;
  component_file_item_t *component_file;
  ltfn_data_holder_t *params = (ltfn_data_holder_t *) data;

  /* Check to see if the file is named what we expect it to be
     named */

  len = sizeof(component_template) + strlen(params->type) + 32;
  if ( 0 < strlen(params->name) ) {
    len += strlen(params->name);
  }
  prefix = (char*)malloc(len);
  snprintf(prefix, len, component_template, params->type);
  prefix_len = strlen(prefix);
  if (0 < strlen(params->name)) {
    strncat(prefix, params->name, len - prefix_len);
  }
  total_len = strlen(prefix);

  basename = strrchr(filename, '/');
  if (NULL == basename) {
    basename = filename;
  } else {
    basename += 1;
  }

  if (0 != strncmp(basename, prefix, total_len)) {
    free(prefix);
    return 0;
  }

  /* Save all the info and put it in the list of found components */

  component_file = OBJ_NEW(component_file_item_t);
  if (NULL == component_file) {
    free(prefix);
    return OPAL_ERR_OUT_OF_RESOURCE;
  }
  strncpy(component_file->type, params->type, MCA_BASE_MAX_TYPE_NAME_LEN);
  component_file->type[MCA_BASE_MAX_TYPE_NAME_LEN] = '\0';
  strncpy(component_file->name, basename + prefix_len,
          MCA_BASE_MAX_COMPONENT_NAME_LEN);
  component_file->name[MCA_BASE_MAX_COMPONENT_NAME_LEN] = '\0';
  strncpy(component_file->basename, basename, OMPI_PATH_MAX);
  component_file->basename[OMPI_PATH_MAX] = '\0';
  strncpy(component_file->filename, filename, OMPI_PATH_MAX);
  component_file->filename[OMPI_PATH_MAX] = '\0';
  component_file->status = UNVISITED;

#if defined(__WINDOWS__) && defined(_DEBUG) 
  /* remove the debug suffix 'd', otherwise we will fail to  
     load the module in later phase. */ 
  component_file->name[strlen(component_file->name)-1] = '\0'; 
#endif

  opal_list_append(&found_files, (opal_list_item_t *) component_file);

  /* All done */

  free(prefix);
  return 0;
}
Exemplo n.º 2
0
static int update_route(orte_process_name_t *target,
                        orte_process_name_t *route)
{
    int i;
    orte_routed_jobfam_t *jfam;
    uint16_t jfamily;

    if (target->jobid == ORTE_JOBID_INVALID ||
            target->vpid == ORTE_VPID_INVALID) {
        return ORTE_ERR_BAD_PARAM;
    }

    /* if I am an application process, we don't update the route since
     * we automatically route everything through the local daemon
     */
    if (ORTE_PROC_IS_APP) {
        return ORTE_SUCCESS;
    }

    OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
                         "%s routed_radix_update: %s --> %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_NAME_PRINT(target),
                         ORTE_NAME_PRINT(route)));


    /* if I am a daemon and the target is my HNP, then check
     * the route - if it isn't direct, then we just flag that
     * we have a route to the HNP
     */
    if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_HNP, target) &&
            OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_HNP, route)) {
        hnp_direct = false;
        return ORTE_SUCCESS;
    }

    /* if this is from a different job family, then I need to
     * track how to send messages to it
     */
    if (ORTE_JOB_FAMILY(target->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {

        /* if I am a daemon, then I will automatically route
         * anything to this job family via my HNP - so nothing to do
         * here, just return
         */
        if (ORTE_PROC_IS_DAEMON) {
            return ORTE_SUCCESS;
        }

        OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
                             "%s routed_radix_update: diff job family routing job %s --> %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_JOBID_PRINT(target->jobid),
                             ORTE_NAME_PRINT(route)));

        /* see if this target is already present */
        jfamily = ORTE_JOB_FAMILY(target->jobid);
        for (i=0; i < orte_routed_jobfams.size; i++) {
            if (NULL == (jfam = (orte_routed_jobfam_t*)opal_pointer_array_get_item(&orte_routed_jobfams, i))) {
                continue;
            }
            if (jfam->job_family == jfamily) {
                OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output,
                                     "%s routed_radix: updating route to %s via %s",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                     ORTE_JOB_FAMILY_PRINT(target->jobid),
                                     ORTE_NAME_PRINT(route)));
                jfam->route.jobid = route->jobid;
                jfam->route.vpid = route->vpid;
                return ORTE_SUCCESS;
            }
        }

        /* not there, so add the route FOR THE JOB FAMILY*/
        OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output,
                             "%s routed_radix: adding route to %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_JOB_FAMILY_PRINT(target->jobid)));
        jfam = OBJ_NEW(orte_routed_jobfam_t);
        jfam->job_family = jfamily;
        jfam->route.jobid = route->jobid;
        jfam->route.vpid = route->vpid;
        opal_pointer_array_add(&orte_routed_jobfams, jfam);
        return ORTE_SUCCESS;
    }

    /* THIS CAME FROM OUR OWN JOB FAMILY... */

    opal_output(0, "%s CALL TO UPDATE ROUTE FOR OWN JOB FAMILY", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));

    return ORTE_ERR_NOT_SUPPORTED;
}
Exemplo n.º 3
0
int orte_util_build_daemon_nidmap(char **nodes)
{
    orte_nid_t *node;
    int i, num_nodes;
    int rc;
    struct hostent *h;
    opal_buffer_t buf;
    orte_process_name_t proc;
    char *uri, *addr;
    char *proc_name;
    
    num_nodes = opal_argv_count(nodes);
    
    OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
                         "%s orte:util:build:daemon:nidmap found %d nodes",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), num_nodes));
    
    if (0 == num_nodes) {
        /* nothing to do */
        return ORTE_SUCCESS;
    }
    
    /* set the size of the nidmap storage so we minimize realloc's */
    if (ORTE_SUCCESS != (rc = opal_pointer_array_set_size(&orte_nidmap, num_nodes+1))) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }
    
    /* install the entry for the HNP */
    node = OBJ_NEW(orte_nid_t);
    node->name = strdup("HNP");
    node->daemon = 0;
    /* the arch defaults to our arch so that non-hetero
     * case will yield correct behavior
     */
    opal_pointer_array_set_item(&orte_nidmap, 0, node);        
    
    /* the daemon vpids will be assigned in order,
     * starting with vpid=1 for the first node in
     * the list
     */
    OBJ_CONSTRUCT(&buf, opal_buffer_t);
    proc.jobid = ORTE_PROC_MY_NAME->jobid;
    for (i=0; i < num_nodes; i++) {
        node = OBJ_NEW(orte_nid_t);
        node->name = strdup(nodes[i]);
        node->daemon = i+1;
        /* the arch defaults to our arch so that non-hetero
         * case will yield correct behavior
         */
        opal_pointer_array_set_item(&orte_nidmap, node->daemon, node);        
        
        /* lookup the address of this node */
        if (NULL == (h = gethostbyname(node->name))) {
            ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
            return ORTE_ERR_NOT_FOUND;
        }
        addr = inet_ntoa(*(struct in_addr*)h->h_addr_list[0]);
        
        /* since we are using static ports, all my fellow daemons will be on my
         * port. Setup the contact info for each daemon in my hash tables. Note
         * that this will -not- open a port to those daemons, but will only
         * define the info necessary for opening such a port if/when I communicate
         * to them
         */
        /* construct the URI */
        proc.vpid = node->daemon;
        ORTE_EPOCH_SET(proc.epoch,ORTE_EPOCH_MIN);

        orte_util_convert_process_name_to_string(&proc_name, &proc);
        asprintf(&uri, "%s;tcp://%s:%d", proc_name, addr, (int)orte_process_info.my_port);
        OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
                             "%s orte:util:build:daemon:nidmap node %s daemon %d addr %s uri %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             node->name, (int)node->daemon, addr, uri));
        opal_dss.pack(&buf, &uri, 1, OPAL_STRING);
        free(proc_name);
        free(uri);
    }
    
    /* load the hash tables */
    if (ORTE_SUCCESS != (rc = orte_rml_base_update_contact_info(&buf))) {
        ORTE_ERROR_LOG(rc);
    }
    OBJ_DESTRUCT(&buf);

    return rc;
}
static mca_sbgp_base_module_t *mca_sbgp_basesmsocket_select_procs(struct ompi_proc_t ** procs,
    int n_procs_in,
    struct ompi_communicator_t *comm,
    char *key,
    void *output_data
    )
{
    /* local variables */
    mca_sbgp_basesmsocket_module_t *module;
    /*
    opal_buffer_t* sbuffer = OBJ_NEW(opal_buffer_t);
    opal_buffer_t* rbuffer = OBJ_NEW(opal_buffer_t);
    */
    opal_paffinity_base_cpu_set_t my_cpu_set;
    bool bound;
    int ret;
    int num_processors;
    int socket_tmp;
    int my_socket_index;
    int core_index=-1;
    int proc, cnt, local, n_local_peers, my_index, my_rank;
    ompi_proc_t* my_proc;
    int *local_ranks_in_comm=NULL;
    int *socket_info=NULL, my_socket_info;
    int  i_cnt, lp_cnt, my_local_index, comm_size=ompi_comm_size(comm);

    /* initialize data */
    output_data=NULL;
    my_rank=ompi_comm_rank(comm);
    my_proc=ompi_comm_peer_lookup(comm,my_rank);
    for( proc=0 ; proc < n_procs_in ; proc++) {
        if( procs[proc]==my_proc){
            my_index=proc;
        }
    }

    /*create a new module*/
    module=OBJ_NEW(mca_sbgp_basesmsocket_module_t);
    if (!module ) {
        return NULL;
    }
    module->super.group_size=0;
    module->super.group_comm = comm;
    module->super.group_list = NULL;
    module->super.group_net = OMPI_SBGP_SOCKET;

/*
    ** get my process affinity information
    ** */

    /* get the number of processors on this node */

    ret=opal_paffinity_base_get_processor_info(&num_processors);

    /* get process affinity mask */
    OPAL_PAFFINITY_CPU_ZERO(my_cpu_set);
    ret=opal_paffinity_base_get(&my_cpu_set);
    OPAL_PAFFINITY_PROCESS_IS_BOUND(my_cpu_set,&bound);

     /*debug process affinity*/
    /*
    {
        ret=opal_paffinity_base_get_socket_info(&num_socket);
        fprintf(stderr,"Number of sockets %d\n",num_socket);
        fprintf(stderr,"Test if rank %d is bound %d\n", my_rank, bound);
        fprintf(stderr,"return from opal_paffinity_base_get: %d\n\n",ret);
        fprintf(stderr,"bitmask elements: ");
        unsigned int long  jj;
        for(jj=0; jj < OPAL_PAFFINITY_BITMASK_NUM_ELEMENTS; jj++)
                 fprintf(stderr," %d ",my_cpu_set.bitmask[jj]);
        fprintf(stderr,"\n");
        fflush(stderr);
    }
    end debug process affinity*/

    if( !bound ) {

        /* pa affinity not set, so socket index will be set to -1 */
        my_socket_index=-1;
        /*debug print*/
        /* */
        fprintf(stderr,"[%d]FAILED to set basesmsocket group !!!\n",my_rank);
        fflush(stderr);
        /*end debug*/
        goto NoLocalPeers;
    } else {

        my_socket_index=-1;
        /* loop over number of processors */
        for ( proc=0 ; proc < num_processors ; proc++ ) {
            if (OPAL_PAFFINITY_CPU_ISSET(proc,my_cpu_set)) {
        ret=opal_paffinity_base_get_map_to_socket_core(proc,&socket_tmp,&core_index);
        if( my_socket_index != socket_tmp ) {
                        my_socket_index=socket_tmp;
                        break;
        }
        }
    } /* end of proc loop */
    }

    /* Debug prints */
       /*
    {
     fprintf(stderr,"Number of processors per node: %d\n",num_processors);
     fprintf(stderr,"I am rank %d and my socket index is %d\n and my core index is %d\n",my_rank,my_socket_index,core_index);
     fprintf(stderr,"n_proc_in = %d\n",n_procs_in);
     fprintf(stderr,"\n");
     fflush(stderr);
    }
       end debug prints */


    /*get my socket index*/
    cnt=0;
    for( proc=0 ; proc < n_procs_in ; proc++) {
    local=OPAL_PROC_ON_LOCAL_NODE(procs[proc]->proc_flags);
    if( local ) {
      cnt++;
    }
    }
     /*debug print */
    /*
    fprintf(stderr,"Number of local processors %d\n",cnt);
    end debug print*/

    /* if no other local procs found skip to end */
    if( 1 >= cnt ) {
      goto NoLocalPeers;
    }


#if 0
    int *local_ranks_in_comm;
    int32_t *socket_info, *my_socket_info;
    int  my_local_index;
#endif
    /* allocate structure to hold the list of local ranks */
    local_ranks_in_comm=(int *)malloc(sizeof(int)*cnt);
    if(NULL == local_ranks_in_comm ){
    goto Error;
    }
    /* figure out which ranks from the input communicator - comm - will
     * particiapte in the local socket determination.
     */

    n_local_peers=0;
    i_cnt=0;
    for( proc = 0; proc < n_procs_in; proc++){
        local = OPAL_PROC_ON_LOCAL_NODE(procs[proc]->proc_flags);
        if ( local ) {

            /* set the rank within the on-host ranks - this will be used for tha
             * allgather
             */
            if( my_proc == procs[proc] ) {
                my_local_index=n_local_peers;
            }
            /* find the rank of the current proc in comm.  We take advantage
             * of the fact that ranks in a group have the same relative
             * ordering as they do within the communicator.
             */
#if 1
            /*for( lp_cnt=i_cnt; lp_cnt < comm_size ; lp_cnt++ ) {*/
            for( lp_cnt=proc; lp_cnt < comm_size ; lp_cnt++ ) {
                if(procs[proc] == ompi_comm_peer_lookup(comm,lp_cnt) ){
                    local_ranks_in_comm[i_cnt]=lp_cnt;
                    /* lp_cnt has alrady been checked */
                    i_cnt++;
                    /* found the corresponding rank in comm, so don't need
                     * to search any more */
                    break;
                }
                /*i_cnt++;*/
                /*fprintf(stderr,"QQQ i_cnt %d \n",i_cnt);*/
            }
#endif
            n_local_peers++;
        }
    }
    /*fprintf(stderr,"YYY n_local_peers %d\n",n_local_peers);*/
    socket_info=(int *)malloc(sizeof(int)*n_local_peers);
    /*fprintf(stderr,"XXX got socket info\n");*/
    if(NULL == socket_info ){
    goto Error;
    }

    my_socket_info=my_socket_index;

    /* Allgather data over the communicator */
    ret=comm_allgather_pml(&my_socket_info, socket_info, 1,
            MPI_INT, my_local_index, n_local_peers, local_ranks_in_comm,comm);
    if (OMPI_SUCCESS != ret ) {
        fprintf(stderr," comm_allgather_pml returned error %d \n", ret);
        fflush(stderr);
        return NULL;
    }


    /*allocate memory to the group_list probably an overestimation
      of the necessary resources */
    module->super.group_list=(int *)malloc(sizeof(int)*cnt);
    if(NULL == module->super.group_list){
    goto Error;
    }

    /* figure out who is sharing the same socket */
    cnt=0;
    for (proc = 0; proc < n_local_peers; proc++) {
               int rem_rank=local_ranks_in_comm[proc];
               int rem_socket_index=socket_info[proc];

        /*Populate the list*/
        if (rem_socket_index == my_socket_index) {
                module->super.group_list[cnt]=rem_rank;
                cnt++;
        }
    }

    module->super.group_size=cnt;

    /*debug print*/
    /*
    {
        int ii;
        fprintf(stderr,"Ranks per socket: %d\n",cnt);
        fprintf(stderr,"Socket %d owns ranks: ", my_socket_index);
        for (ii=0; ii < cnt; ii++)
            fprintf(stderr,"%d ",module->super.group_list[ii]);
        fprintf(stderr,"\n");
        fflush(stderr);
    }

    {
        cpu_set_t set;
        unsigned int len = sizeof(set);
        int i;
        unsigned long mask = 0;
        CPU_ZERO(&set);
        if (sched_getaffinity(0, len, &set) < 0) {
            perror("sched_getaffinity");
            return -1;
        }
        for (i = 0; i < CPU_SETSIZE; i++) {
            int cpu = CPU_ISSET(i, &set);
            if (cpu) {
                mask |= 1<< i;
            }
        }
        opal_output(0,"%d: my affinity mask is: %08lx\n", my_local_index,mask);
    }


    end debug*/


    /*Free resources*/
    free(local_ranks_in_comm);
    free(socket_info);

    /*Return the module*/
    return (mca_sbgp_base_module_t *) module;


NoLocalPeers:
    /* nothing to store, so just free the module and return */
    /*fprintf(stderr,"No local socket peers\n");*/
    /*free(module);*/
    if(socket_info) {
        free(socket_info);
        socket_info=NULL;
    }
    if(local_ranks_in_comm){
        free(local_ranks_in_comm);
    }
    OBJ_RELEASE(module);
    return NULL;

Error:
    /*clean up*/
    if( NULL != module->super.group_list){
        free(module->super.group_list);
        module->super.group_list=NULL;
    }
    if(socket_info) {
        free(socket_info);
        socket_info=NULL;
    }
    if(local_ranks_in_comm){
        free(local_ranks_in_comm);
    }
    OBJ_RELEASE(module);
    return NULL;


}
/*
 * Open up all directories in a given path and search for components of
 * the specified type (and possibly of a given name).
 *
 * Note that we use our own path iteration functionality (vs. ltdl's
 * lt_dladdsearchdir() functionality) because we need to look at
 * companion .ompi_info files in the same directory as the library to
 * generate dependencies, etc.  If we use the plain lt_dlopen()
 * functionality, we would not get the directory name of the file
 * finally opened in recursive dependency traversals.
 */
static void find_dyn_components(const char *path, const char *type_name, 
                                const char **names, bool include_mode,
                                opal_list_t *found_components)
{
    int i, len;
    char *path_to_use = NULL, *dir, *end;
    component_file_item_t *file;
    opal_list_item_t *cur;
    char prefix[32 + MCA_BASE_MAX_TYPE_NAME_LEN], *basename;
    
    /* If path is NULL, iterate over the set of directories specified by
       the MCA param mca_base_component_path.  If path is not NULL, then
       use that as the path. */
  
    if (NULL == path) {
        if (NULL != mca_base_component_path) {
            path_to_use = strdup (mca_base_component_path);
        } else {
            /* If there's no path, then there's nothing to search -- we're
               done */
            return;
        }

        if (NULL == path_to_use) {
            /* out of memory */
            return;
        }
    } else {
        path_to_use = strdup(path);
    }
  
    /* If we haven't done so already, iterate over all the files in
       the directories in the path and make a master array of all the
       matching filenames that we find.  Save the filenames in an
       argv-style array.  Re-scan do this if the mca_component_path
       has changed. */
    if (NULL == found_filenames || 
        (NULL != last_path_to_use && 
         0 != strcmp(path_to_use, last_path_to_use))) {
        if (NULL != found_filenames) {
            opal_argv_free(found_filenames);
            found_filenames = NULL;
            free(last_path_to_use);
            last_path_to_use = NULL;
        }
        if (NULL == last_path_to_use) {
            last_path_to_use = strdup(path_to_use);
        }

        dir = path_to_use;
        if (NULL != dir) {
            do {
                end = strchr(dir, OPAL_ENV_SEP);
                if (NULL != end) {
                    *end = '\0';
                }
                if ((0 == strcmp(dir, "USER_DEFAULT") ||
                     0 == strcmp(dir, "USR_DEFAULT"))
                    && NULL != mca_base_user_default_path) {
                    if (0 != lt_dlforeachfile(mca_base_user_default_path,
                                              save_filename, NULL)) {
                        break;
                    }
                } else if (0 == strcmp(dir, "SYS_DEFAULT") ||
                           0 == strcmp(dir, "SYSTEM_DEFAULT")) {
                    if (0 != lt_dlforeachfile(mca_base_system_default_path,
                                              save_filename, NULL)) {
                        break;
                    }                    
                } else {
                    if (0 != lt_dlforeachfile(dir, save_filename, NULL)) {
                        break;
                    }
                }
                dir = end + 1;
            } while (NULL != end);
        }
    }
    
    /* Look through the list of found files and find those that match
       the desired framework name */
    snprintf(prefix, sizeof(prefix) - 1, component_template, type_name);
    len = strlen(prefix);
    OBJ_CONSTRUCT(&found_files, opal_list_t);
    for (i = 0; NULL != found_filenames && NULL != found_filenames[i]; ++i) {
        basename = strrchr(found_filenames[i], '/');
        if (NULL == basename) {
            basename = found_filenames[i];
        } else {
            basename += 1;
        }
        
        if (0 != strncmp(basename, prefix, len)) {
            continue;
        }
        
        /* We found a match; save all the relevant details in the
           found_files list */
        file = OBJ_NEW(component_file_item_t);
        if (NULL == file) {
            return;
        }
        strncpy(file->type, type_name, MCA_BASE_MAX_TYPE_NAME_LEN);
        file->type[MCA_BASE_MAX_TYPE_NAME_LEN] = '\0';
        strncpy(file->name, basename + len, MCA_BASE_MAX_COMPONENT_NAME_LEN);
        file->name[MCA_BASE_MAX_COMPONENT_NAME_LEN] = '\0';
        strncpy(file->basename, basename, OPAL_PATH_MAX);
        file->basename[OPAL_PATH_MAX] = '\0';
        strncpy(file->filename, found_filenames[i], OPAL_PATH_MAX);
        file->filename[OPAL_PATH_MAX] = '\0';
        file->status = UNVISITED;

        opal_list_append(&found_files, (opal_list_item_t *) 
                         file);
    }

    /* Iterate through all the filenames that we found that matched
       the framework we were looking for.  Since one component may
       [try to] call another to be loaded, only try to load the
       UNVISITED files.  Also, ignore the return code -- basically,
       give every file one chance to try to load.  If they load,
       great.  If not, great. */
    for (cur = opal_list_get_first(&found_files); 
         opal_list_get_end(&found_files) != cur;
         cur = opal_list_get_next(cur)) {
        file = (component_file_item_t *) cur;

        if( UNVISITED == file->status ) {
            bool op = true;
            file->status = CHECKING_CYCLE;

            op = use_component(include_mode, names, file->name);
            if( true == op ) {
                open_component(file, found_components);
            }
        }
    }
    
    /* So now we have a final list of loaded components.  We can free all
       the file information. */
    for (cur = opal_list_remove_first(&found_files); 
         NULL != cur;
         cur = opal_list_remove_first(&found_files)) {
        OBJ_RELEASE(cur);
    }
    OBJ_DESTRUCT(&found_files);

    /* All done, now let's cleanup */
    free(path_to_use);
}
Exemplo n.º 6
0
static int show_help(const char *filename, const char *topic,
                     const char *output, orte_process_name_t *sender)
{
    int rc;
    tuple_list_item_t *tli = NULL;
    orte_namelist_t *pnli;
    time_t now = time(NULL);

    /* If we're aggregating, check for duplicates.  Otherwise, don't
       track duplicates at all and always display the message. */
    if (orte_help_want_aggregate) {
        rc = get_tli(filename, topic, &tli);
    } else {
        rc = ORTE_ERR_NOT_FOUND;
    }

    /* If there's no output string (i.e., this is a control message
       asking us to suppress), then skip to the end. */
    if (NULL == output) {
        tli->tli_display = false;
        goto after_output;
    }

    /* Was it already displayed? */
    if (ORTE_SUCCESS == rc) {
        /* Yes.  But do we want to print anything?  That's complicated.

           We always show the first message of a given (filename,
           topic) tuple as soon as it arrives.  But we don't want to
           show duplicate notices often, because we could get overrun
           with them.  So we want to gather them up and say "We got N
           duplicates" every once in a while.

           And keep in mind that at termination, we'll unconditionally
           show all accumulated duplicate notices.

           A simple scheme is as follows:
           - when the first of a (filename, topic) tuple arrives
             - print the message
             - if a timer is not set, set T=now
           - when a duplicate (filename, topic) tuple arrives
             - if now>(T+5) and timer is not set (due to
               non-pre-emptiveness of our libevent, a timer *could* be
               set!)
               - print all accumulated duplicates
               - reset T=now
             - else if a timer was not set, set the timer for T+5
             - else if a timer was set, do nothing (just wait)
           - set T=now when the timer expires
        */           
        ++tli->tli_count_since_last_display;
        if (now > show_help_time_last_displayed + 5 && !show_help_timer_set) {
            show_accumulated_duplicates(0, 0, NULL);
        } else if (!show_help_timer_set) {
            opal_event_evtimer_set(orte_event_base, &show_help_timer_event,
                                   show_accumulated_duplicates, NULL);
            opal_event_evtimer_add(&show_help_timer_event, &show_help_interval);
            show_help_timer_set = true;
        }
    } 
    /* Not already displayed */
    else if (ORTE_ERR_NOT_FOUND == rc) {
        if (orte_xml_output) {
            char *tmp;
            tmp = xml_format((unsigned char*)output);
            fprintf(orte_xml_fp, "%s", tmp);
            fflush(orte_xml_fp);
            free(tmp);
        } else {
            opal_output(orte_clean_output, "%s", output);
        }
        if (!show_help_timer_set) {
            show_help_time_last_displayed = now;
        }
    }
    /* Some other error occurred */
    else {
        ORTE_ERROR_LOG(rc);
        return rc;
    }

 after_output:
    /* If we're aggregating, add this process name to the list */
    if (orte_help_want_aggregate) {
        pnli = OBJ_NEW(orte_namelist_t);
        if (NULL == pnli) {
            rc = ORTE_ERR_OUT_OF_RESOURCE;
            ORTE_ERROR_LOG(rc);
            return rc;
        }
        pnli->name = *sender;
        opal_list_append(&(tli->tli_processes), &(pnli->super));
    }
    return ORTE_SUCCESS;
}
Exemplo n.º 7
0
int orte_show_help_suppress(const char *filename, const char *topic)
{
    int rc = ORTE_SUCCESS;
    int8_t have_output = 0;
    
    if (orte_execute_quiet) {
        return ORTE_SUCCESS;
    }
    
    if (!ready) {
        /* If we are finalizing, then we have no way to process this
           through the orte_show_help system - just drop it. */
        return ORTE_SUCCESS;
    }
    
    /* If we are the HNP, or the RML has not yet been setup, or ROUTED
       has not been setup, or we weren't given an HNP, then all we can
       do is process this locally. */
    if (ORTE_PROC_IS_HNP ||
        NULL == orte_rml.send_buffer_nb ||
        NULL == orte_routed.get_route ||
        NULL == orte_process_info.my_hnp_uri) {
        rc = show_help(filename, topic, NULL, ORTE_PROC_MY_NAME);
    }
    
    /* otherwise, we relay the output message to
     * the HNP for processing
     */
    else {
        opal_buffer_t *buf;
        static bool am_inside = false;

        /* JMS Note that we *may* have a recursion situation here where
           the RML could call show_help.  Need to think about this
           properly, but put a safeguard in here for sure for the time
           being. */
        if (am_inside) {
            rc = show_help(filename, topic, NULL, ORTE_PROC_MY_NAME);
        } else {
            am_inside = true;
        
            /* build the message to the HNP */
            buf = OBJ_NEW(opal_buffer_t);
            /* pack the filename of the show_help text file */
            opal_dss.pack(buf, &filename, 1, OPAL_STRING);
            /* pack the topic tag */
            opal_dss.pack(buf, &topic, 1, OPAL_STRING);
            /* pack the flag that we DO NOT have a string */
            opal_dss.pack(buf, &have_output, 1, OPAL_INT8);
            /* send it to the HNP */
            if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf,
                                                  ORTE_RML_TAG_SHOW_HELP, 0,
                                                  orte_rml_send_callback, NULL))) {
                ORTE_ERROR_LOG(rc);
                OBJ_RELEASE(buf);
            }
            am_inside = false;
        }
    }
    
    return ORTE_SUCCESS;
}
Exemplo n.º 8
0
static int rmaps_lama_convert_hwloc_subtree(hwloc_obj_t obj,
                                            opal_tree_item_t *parent_item)
{
    rmaps_lama_max_tree_item_t *max_tree_item = NULL;
    char * key_child_str = NULL;
    char * key_parent_str = NULL;

    while (obj) {
        /*
         * Create new tree item
         */
        max_tree_item = OBJ_NEW(rmaps_lama_max_tree_item_t);

        /*
         * Convert the HWLOC object to the LAMA key
         */
        rmaps_lama_convert_hwloc_key_to_lama_key(obj->type,
                                                 obj->attr->cache.depth,
                                                 &(max_tree_item->type));

        /*
         * Append tree item to parent.  Unless it is the same as the
         * parent (L1 instruction vs data cache).  JJH: Newer versions
         * of hwloc can differentiate from the obj->attr->cache.type.
         */
        if( NULL != obj->parent &&
            obj->parent->type == obj->type &&
            obj->parent->attr->cache.depth == obj->attr->cache.depth ) {
            key_child_str  = lama_type_enum_to_str(max_tree_item->type);
            key_parent_str = lama_type_enum_to_str(((rmaps_lama_max_tree_item_t*)parent_item)->type);
            opal_output_verbose(10, orte_rmaps_base_framework.framework_output,
                                "mca:rmaps:lama: Warning: Identical level detected: "
                                "Child [%s] vs Parent [%s]",
                                key_child_str, key_parent_str);
            free(key_child_str);
            free(key_parent_str);

            /*
             * Add descendants if they exist
             */
            if (obj->first_child) {
                rmaps_lama_convert_hwloc_subtree(obj->first_child,
                                                 parent_item);
            }
        } else {
            opal_tree_add_child(parent_item, &max_tree_item->tree_element);

            /*
             * Add descendants if they exist
             */
            if (obj->first_child) {
                rmaps_lama_convert_hwloc_subtree(obj->first_child,
                                                 &max_tree_item->tree_element);
            }
        }

        /*
         * Advance to next sibling
         */
        obj = obj->next_sibling;
    }

    return ORTE_SUCCESS;
}
Exemplo n.º 9
0
static int rmaps_lama_annotate_node_for_mppr(orte_node_t *node, hwloc_obj_t obj)
{
    rmaps_lama_hwloc_user_t *hwloc_userdata = NULL;
    rmaps_lama_node_mppr_t *mppr_accounting = NULL;
    rmaps_lama_level_type_t lama_key;
    opal_hwloc_topo_data_t *opal_hwloc_topo = NULL;
    int i;

    /*
     * Attach our user pointer to the topology, if it is not already there.
     * We will fill it in as needed later.
     *
     * Note: opal/mca/hwloc/base/hwloc_base_util.c attaches their own object
     * to the userdata. There is a pointer in that structure we can use without
     * interfering with what OPAL is trying to do.
     */
    if( NULL == obj->userdata ) {
        /* Some objects may not have topo data associated with them
         * JJH: This is memory leak :/ Fix.
         */
        obj->userdata = (void*)OBJ_NEW(opal_hwloc_topo_data_t);
    }
    if( NULL != obj->userdata ) {
        opal_hwloc_topo = (opal_hwloc_topo_data_t*)(obj->userdata);

        if( NULL == opal_hwloc_topo->userdata ) {
            hwloc_userdata = OBJ_NEW(rmaps_lama_hwloc_user_t);
            opal_hwloc_topo->userdata  = hwloc_userdata;
        } else {
            hwloc_userdata = (rmaps_lama_hwloc_user_t*)(opal_hwloc_topo->userdata);
        }
    }


    /*
     * Add node information if it is not already there
     */
    mppr_accounting = (rmaps_lama_node_mppr_t*)opal_pointer_array_get_item(hwloc_userdata->node_mppr, node->index);
    if( NULL == mppr_accounting ) {
        /*
         * Add MPPR accounting for this node associated with this object
         */
        rmaps_lama_convert_hwloc_key_to_lama_key(obj->type, obj->attr->cache.depth, &lama_key);

        mppr_accounting = (rmaps_lama_node_mppr_t*)malloc(sizeof(rmaps_lama_node_mppr_t));
        mppr_accounting->max = rmaps_lama_get_mppr_for_key(node, lama_key);
        mppr_accounting->cur =  0;

        opal_pointer_array_set_item(hwloc_userdata->node_mppr, node->index, mppr_accounting);
    }


    /*
     * Decend tree
     */
    for(i = 0; i < (int)obj->arity; ++i ) {
        rmaps_lama_annotate_node_for_mppr(node,
                                          obj->children[i]);
    }

    return ORTE_SUCCESS;
}
Exemplo n.º 10
0
static int oshmem_mkey_recv_cb(void)
{
    MPI_Status status;
    int flag;
    int n;
    int rc;
    opal_buffer_t *msg;
    int32_t size;
    void *tmp_buf;
    oob_comm_request_t *r;

    n = 0;
    r = (oob_comm_request_t *)opal_list_get_first(&memheap_oob.req_list);
    assert(r);
    while(r != (oob_comm_request_t *)opal_list_get_end(&memheap_oob.req_list)) {
        my_MPI_Test(&r->recv_req, &flag, &status);
        if (OPAL_LIKELY(0 == flag)) {
            return n;
        }
        PMPI_Get_count(&status, MPI_BYTE, &size);
        MEMHEAP_VERBOSE(5, "OOB request from PE: %d, size %d", status.MPI_SOURCE, size);
        n++;
        opal_list_remove_first(&memheap_oob.req_list);

        /* to avoid deadlock we must start request
         * before processing it. Data are copied to
         * the tmp buffer
         */
        tmp_buf = malloc(size);
        if (NULL == tmp_buf) {
            MEMHEAP_ERROR("not enough memory");
            ORTE_ERROR_LOG(0);
            return n;
        } else {
		    memcpy(tmp_buf, (void*)&r->buf, size);
		    msg = OBJ_NEW(opal_buffer_t);
		    if (NULL == msg) {
		        MEMHEAP_ERROR("not enough memory");
		        ORTE_ERROR_LOG(0);
		        free(tmp_buf);
		        return n;
		    }
		    opal_dss.load(msg, (void*)tmp_buf, size);

            /*
             * send reply before posting the receive request again to limit the recursion size to
             * number of receive requests.
             * send can call opal_progress which calls this function again. If recv req is started
             * stack size will be proportional to number of job ranks.
             */
            do_recv(status.MPI_SOURCE, msg);
            OBJ_RELEASE(msg);
        }

        rc = PMPI_Start(&r->recv_req);
        if (MPI_SUCCESS != rc) {
            MEMHEAP_ERROR("Failed to post recv request %d", rc);
            ORTE_ERROR_LOG(rc);
            return n;
        }
        opal_list_append(&memheap_oob.req_list, &r->super);


        r = (oob_comm_request_t *)opal_list_get_first(&memheap_oob.req_list);
        assert(r);
    }

    return 1;
}
Exemplo n.º 11
0
void mca_memheap_modex_recv_all(void)
{
    int i;
    int j;
    int nprocs, my_pe;
    opal_buffer_t *msg = NULL;
    void *send_buffer = NULL;
    char *rcv_buffer = NULL;
    int size;
    int *rcv_size = NULL;
    int *rcv_n_transports = NULL;
    int *rcv_offsets = NULL;
    int rc = OSHMEM_SUCCESS;
    size_t buffer_size;

    if (!mca_memheap_base_key_exchange) {
        oshmem_shmem_barrier();
        return;
    }

    nprocs = oshmem_num_procs();
    my_pe = oshmem_my_proc_id();

    /* buffer allocation for num_transports
     * message sizes and offsets */

    rcv_size = (int *)malloc(nprocs * sizeof(int));
    if (NULL == rcv_size) {
        MEMHEAP_ERROR("failed to get rcv_size buffer");
        rc = OSHMEM_ERR_OUT_OF_RESOURCE;
        goto exit_fatal;
    }

    rcv_offsets = (int *)malloc(nprocs * sizeof(int));
    if (NULL == rcv_offsets) {
        MEMHEAP_ERROR("failed to get rcv_offsets buffer");
        rc = OSHMEM_ERR_OUT_OF_RESOURCE;
        goto exit_fatal;
    }

    rcv_n_transports = (int *)malloc(nprocs * sizeof(int));
    if (NULL == rcv_offsets) {
        MEMHEAP_ERROR("failed to get rcv_offsets buffer");
        rc = OSHMEM_ERR_OUT_OF_RESOURCE;
        goto exit_fatal;
    }

    /* serialize our own mkeys */
    msg = OBJ_NEW(opal_buffer_t);
    if (NULL == msg) {
        MEMHEAP_ERROR("failed to get msg buffer");
        rc = OSHMEM_ERR_OUT_OF_RESOURCE;
        goto exit_fatal;
    }

    for (j = 0; j < memheap_map->n_segments; j++) {
        pack_local_mkeys(msg, 0, j);
    }

    /* we assume here that int32_t returned by opal_dss.unload
     * is equal to size of int we use for MPI_Allgather, MPI_Allgatherv */

    assert(sizeof(int32_t) == sizeof(int));

    /* Do allgather */
    opal_dss.unload(msg, &send_buffer, &size);
    MEMHEAP_VERBOSE(1, "local keys packed into %d bytes, %d segments", size, memheap_map->n_segments);

    /* we need to send num_transports and message sizes separately
     * since message sizes depend on types of btl used */

    rc = oshmem_shmem_allgather(&memheap_map->num_transports, rcv_n_transports, sizeof(int));
    if (MPI_SUCCESS != rc) {
        MEMHEAP_ERROR("allgather failed");
        goto exit_fatal;
    }

    rc = oshmem_shmem_allgather(&size, rcv_size, sizeof(int));
    if (MPI_SUCCESS != rc) {
        MEMHEAP_ERROR("allgather failed");
        goto exit_fatal;
    }

    /* calculating offsets (displacements) for allgatherv */

    rcv_offsets[0] = 0;
    for (i = 1; i < nprocs; i++) {
        rcv_offsets[i] = rcv_offsets[i - 1] + rcv_size[i - 1];
    }

    buffer_size = rcv_offsets[nprocs - 1] + rcv_size[nprocs - 1];

    rcv_buffer = malloc (buffer_size);
    if (NULL == rcv_buffer) {
        MEMHEAP_ERROR("failed to allocate recieve buffer");
        rc = OSHMEM_ERR_OUT_OF_RESOURCE;
        goto exit_fatal;
    }

    rc = oshmem_shmem_allgatherv(send_buffer, rcv_buffer, size, rcv_size, rcv_offsets);
    if (MPI_SUCCESS != rc) {
        free (rcv_buffer);
        MEMHEAP_ERROR("allgatherv failed");
        goto exit_fatal;
    }

    opal_dss.load(msg, rcv_buffer, buffer_size);

    /* deserialize mkeys */
    OPAL_THREAD_LOCK(&memheap_oob.lck);
    for (i = 0; i < nprocs; i++) {
        if (i == my_pe) {
            continue;
        }

        msg->unpack_ptr = (void *)((intptr_t) msg->base_ptr + rcv_offsets[i]);

        for (j = 0; j < memheap_map->n_segments; j++) {
            map_segment_t *s;

            s = &memheap_map->mem_segs[j];
            if (NULL != s->mkeys_cache[i]) {
                MEMHEAP_VERBOSE(10, "PE%d: segment%d already exists, mkey will be replaced", i, j);
            } else {
                s->mkeys_cache[i] = (sshmem_mkey_t *) calloc(rcv_n_transports[i],
                        sizeof(sshmem_mkey_t));
                if (NULL == s->mkeys_cache[i]) {
                    MEMHEAP_ERROR("PE%d: segment%d: Failed to allocate mkeys cache entry", i, j);
                    oshmem_shmem_abort(-1);
                }
            }
            memheap_oob.mkeys = s->mkeys_cache[i];
            memheap_oob.segno = j;
            unpack_remote_mkeys(msg, i);
        }
    }

    OPAL_THREAD_UNLOCK(&memheap_oob.lck);

exit_fatal:
    if (rcv_size) {
        free(rcv_size);
    }
    if (rcv_offsets) {
        free(rcv_offsets);
    }
    if (rcv_n_transports) {
        free(rcv_n_transports);
    }
    if (send_buffer) {
        free(send_buffer);
    }
    if (msg) {
        OBJ_RELEASE(msg);
    }

    /* This function requires abort in any error case */
    if (OSHMEM_SUCCESS != rc) {
        oshmem_shmem_abort(rc);
    }
}
Exemplo n.º 12
0
static void do_recv(int source_pe, opal_buffer_t* buffer)
{
    int32_t cnt = 1;
    int rc;
    opal_buffer_t *msg;
    uint8_t msg_type;
    uint32_t seg;

    MEMHEAP_VERBOSE(5, "unpacking %d of %d", cnt, OPAL_UINT8);
    rc = opal_dss.unpack(buffer, &msg_type, &cnt, OPAL_UINT8);
    if (OPAL_SUCCESS != rc) {
        ORTE_ERROR_LOG(rc);
        goto send_fail;
    }

    switch (msg_type) {
    case MEMHEAP_RKEY_REQ:
        cnt = 1;
        rc = opal_dss.unpack(buffer, &seg, &cnt, OPAL_UINT32);
        if (OPAL_SUCCESS != rc) {
            MEMHEAP_ERROR("bad RKEY_REQ msg");
            goto send_fail;
        }

        MEMHEAP_VERBOSE(5, "*** RKEY REQ");
        msg = OBJ_NEW(opal_buffer_t);
        if (!msg) {
            MEMHEAP_ERROR("failed to get msg buffer");
            ORTE_ERROR_LOG(rc);
            return;
        }

        msg_type = MEMHEAP_RKEY_RESP;
        opal_dss.pack(msg, &msg_type, 1, OPAL_UINT8);

        if (OSHMEM_SUCCESS != pack_local_mkeys(msg, source_pe, seg)) {
            OBJ_RELEASE(msg);
            goto send_fail;
        }

        rc = send_buffer(source_pe, msg);
        if (MPI_SUCCESS != rc) {
            MEMHEAP_ERROR("FAILED to send rml message %d", rc);
            ORTE_ERROR_LOG(rc);
            goto send_fail;
        }
        break;

    case MEMHEAP_RKEY_RESP:
        MEMHEAP_VERBOSE(5, "*** RKEY RESP");
        OPAL_THREAD_LOCK(&memheap_oob.lck);
        unpack_remote_mkeys(buffer, source_pe);
        memheap_oob.mkeys_rcvd = MEMHEAP_RKEY_RESP;
        opal_condition_broadcast(&memheap_oob.cond);
        OPAL_THREAD_UNLOCK(&memheap_oob.lck);
        break;

    case MEMHEAP_RKEY_RESP_FAIL:
        MEMHEAP_VERBOSE(5, "*** RKEY RESP FAIL");
        memheap_oob.mkeys_rcvd = MEMHEAP_RKEY_RESP_FAIL;
        opal_condition_broadcast(&memheap_oob.cond);
        OPAL_THREAD_UNLOCK(&memheap_oob.lck);
        break;

    default:
        MEMHEAP_VERBOSE(5, "Unknown message type %x", msg_type);
        goto send_fail;
    }
    return;

    send_fail: msg = OBJ_NEW(opal_buffer_t);
    if (!msg) {
        MEMHEAP_ERROR("failed to get msg buffer");
        ORTE_ERROR_LOG(rc);
        return;
    }
    msg_type = MEMHEAP_RKEY_RESP_FAIL;
    opal_dss.pack(msg, &msg_type, 1, OPAL_UINT8);

    rc = send_buffer(source_pe, msg);
    if (MPI_SUCCESS != rc) {
        MEMHEAP_ERROR("FAILED to send rml message %d", rc);
        ORTE_ERROR_LOG(rc);
    }

}
Exemplo n.º 13
0
/* linear iexscan
 * working principle:
 * 1. each node (but node 0) receives from left neigbor
 * 2. performs op
 * 3. all but rank p-1 do sends to it's right neigbor and exits
 *
 */
int ompi_coll_libnbc_iexscan(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
                             struct ompi_communicator_t *comm, ompi_request_t ** request,
                             struct mca_coll_base_module_2_2_0_t *module) {
    int rank, p, res;
    ptrdiff_t gap, span;
    NBC_Schedule *schedule;
#ifdef NBC_CACHE_SCHEDULE
    NBC_Scan_args *args, *found, search;
#endif
    char inplace;
    void *tmpbuf = NULL;
    ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;

    NBC_IN_PLACE(sendbuf, recvbuf, inplace);

    rank = ompi_comm_rank (comm);
    p = ompi_comm_size (comm);

    span = opal_datatype_span(&datatype->super, count, &gap);
    if (0 < rank) {
        tmpbuf = malloc(span);
        if (NULL == tmpbuf) {
            return OMPI_ERR_OUT_OF_RESOURCE;
        }
        if (inplace) {
            res = NBC_Copy(recvbuf, count, datatype, (char *)tmpbuf-gap, count, datatype, comm);
        } else {
            res = NBC_Copy(sendbuf, count, datatype, (char *)tmpbuf-gap, count, datatype, comm);
        }
        if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
            free(tmpbuf);
            return res;
        }
    }

#ifdef NBC_CACHE_SCHEDULE
    /* search schedule in communicator specific tree */
    search.sendbuf = sendbuf;
    search.recvbuf = recvbuf;
    search.count = count;
    search.datatype = datatype;
    search.op = op;
    found = (NBC_Scan_args *) hb_tree_search ((hb_tree *) libnbc_module->NBC_Dict[NBC_EXSCAN], &search);
    if (NULL == found) {
#endif
        schedule = OBJ_NEW(NBC_Schedule);
        if (OPAL_UNLIKELY(NULL == schedule)) {
            free(tmpbuf);
            return OMPI_ERR_OUT_OF_RESOURCE;
        }

        if (rank != 0) {
            res = NBC_Sched_recv (recvbuf, false, count, datatype, rank-1, schedule, false);

            if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
                OBJ_RELEASE(schedule);
                free(tmpbuf);
                return res;
            }

            if (rank < p - 1) {
                /* we have to wait until we have the data */
                res = NBC_Sched_barrier(schedule);
                if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
                    OBJ_RELEASE(schedule);
                    free(tmpbuf);
                    return res;
                }

                res = NBC_Sched_op (recvbuf, false, (void *)(-gap), true, count,
                                     datatype, op, schedule, true);

                if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
                    OBJ_RELEASE(schedule);
                    free(tmpbuf);
                    return res;
                }

                /* send reduced data onward */
                res = NBC_Sched_send ((void *)(-gap), true, count, datatype, rank + 1, schedule, false);
                if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
                    OBJ_RELEASE(schedule);
                    free(tmpbuf);
                    return res;
                }
            }
        } else if (p > 1) {
            if (inplace) {
              res = NBC_Sched_send (recvbuf, false, count, datatype, 1, schedule, false);
            } else {
              res = NBC_Sched_send (sendbuf, false, count, datatype, 1, schedule, false);
            }
            if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
                OBJ_RELEASE(schedule);
                free(tmpbuf);
                return res;
            }
        }

        res = NBC_Sched_commit(schedule);
        if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
            OBJ_RELEASE(schedule);
            free(tmpbuf);
            return res;
        }

#ifdef NBC_CACHE_SCHEDULE
        /* save schedule to tree */
        args = (NBC_Scan_args *) malloc (sizeof (args));
        if (NULL != args) {
            args->sendbuf = sendbuf;
            args->recvbuf = recvbuf;
            args->count = count;
            args->datatype = datatype;
            args->op = op;
            args->schedule = schedule;
            res = hb_tree_insert ((hb_tree *) libnbc_module->NBC_Dict[NBC_EXSCAN], args, args, 0);
            if (0 == res) {
                OBJ_RETAIN(schedule);

                /* increase number of elements for A2A */
                if (++libnbc_module->NBC_Dict_size[NBC_EXSCAN] > NBC_SCHED_DICT_UPPER) {
                    NBC_SchedCache_dictwipe ((hb_tree *) libnbc_module->NBC_Dict[NBC_EXSCAN],
                                             &libnbc_module->NBC_Dict_size[NBC_EXSCAN]);
                }
            } else {
                NBC_Error("error in dict_insert() (%i)", res);
                free (args);
            }
        }
    } else {
        /* found schedule */
        schedule = found->schedule;
        OBJ_RETAIN(schedule);
    }
#endif

    res = NBC_Schedule_request(schedule, comm, libnbc_module, request, tmpbuf);
    if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
        OBJ_RELEASE(schedule);
        free(tmpbuf);
        return res;
    }

    return OMPI_SUCCESS;
}
Exemplo n.º 14
0
int
main(int argc, char *argv[])
{
    int ret, exit_status = OPAL_SUCCESS;
    int child_pid;
    int prev_pid = 0;
    int idx;
    opal_crs_base_snapshot_t *snapshot = NULL;
    char * tmp_env_var = NULL;
    bool select = false;

    /***************
     * Initialize
     ***************/
    if (OPAL_SUCCESS != (ret = initialize(argc, argv))) {
        exit_status = ret;
        goto cleanup;
    }

    /*
     * Check for existence of the file, or program in the case of self
     */
    if( OPAL_SUCCESS != (ret = check_file() )) {
        opal_show_help("help-opal-restart.txt", "invalid_filename", true,
                       opal_restart_globals.snapshot_ref);
        exit_status = ret;
        goto cleanup;
    }

    /* Re-enable the selection of the CRS component, so we can choose the right one */
    idx = mca_base_var_find(NULL, "crs", "base", "do_not_select");

    if (0 > idx) {
        opal_output(opal_restart_globals.output,
                    "MCA variable opal_crs_base_do_not_select not found\n");
        exit_status = OPAL_ERROR;
        goto cleanup;
    }

    ret = mca_base_var_set_value(idx, &select, 0, MCA_BASE_VAR_SOURCE_DEFAULT, NULL);
    if (OPAL_SUCCESS != ret) {
        exit_status = ret;
        goto cleanup;
    }

    /*
     * Make sure we are using the correct checkpointer
     */
    if(NULL == expected_crs_comp) {
        char * full_metadata_path = NULL;
        FILE * metadata = NULL;

        opal_asprintf(&full_metadata_path, "%s/%s/%s",
                 opal_restart_globals.snapshot_loc,
                 opal_restart_globals.snapshot_ref,
                 opal_restart_globals.snapshot_metadata);
        if( NULL == (metadata = fopen(full_metadata_path, "r")) ) {
            opal_show_help("help-opal-restart.txt", "invalid_metadata", true,
                           opal_restart_globals.snapshot_metadata,
                           full_metadata_path);
            exit_status = OPAL_ERROR;
            goto cleanup;
        }
        if( OPAL_SUCCESS != (ret = opal_crs_base_extract_expected_component(metadata,
                                                                            &expected_crs_comp,
                                                                            &prev_pid)) ) {
            opal_show_help("help-opal-restart.txt", "invalid_metadata", true,
                           opal_restart_globals.snapshot_metadata,
                           full_metadata_path);
            exit_status = ret;
            goto cleanup;
        }

        free(full_metadata_path);
        full_metadata_path = NULL;

        fclose(metadata);
        metadata = NULL;
    }

    opal_output_verbose(10, opal_restart_globals.output,
                        "Restart Expects checkpointer: (%s)",
                        expected_crs_comp);

    (void) mca_base_var_env_name("crs", &tmp_env_var);
    opal_setenv(tmp_env_var,
                expected_crs_comp,
                true, &environ);
    free(tmp_env_var);
    tmp_env_var = NULL;

    /* Select this component or don't continue.
     * If the selection of this component fails, then we can't
     * restart on this node because it doesn't have the proper checkpointer
     * available.
     */
    if( OPAL_SUCCESS != (ret = opal_crs_base_open(MCA_BASE_OPEN_DEFAULT)) ) {
        opal_show_help("help-opal-restart.txt", "comp_select_failure", true,
                       "crs", ret);
        exit_status = ret;
        goto cleanup;
    }

    if( OPAL_SUCCESS != (ret = opal_crs_base_select()) ) {
        opal_show_help("help-opal-restart.txt", "comp_select_failure", true,
                       expected_crs_comp, ret);
        exit_status = ret;
        goto cleanup;
    }

    /*
     * Make sure we have selected the proper component
     */
    if(NULL == expected_crs_comp ||
       0 != strncmp(expected_crs_comp,
                    opal_crs_base_selected_component.base_version.mca_component_name,
                    strlen(expected_crs_comp)) ) {
        opal_show_help("help-opal-restart.txt", "comp_select_mismatch",
                       true,
                       expected_crs_comp,
                       opal_crs_base_selected_component.base_version.mca_component_name,
                       ret);
        exit_status = ret;
        goto cleanup;
    }

    /******************************
     * Restart in this process
     ******************************/
    opal_output_verbose(10, opal_restart_globals.output,
                        "Restarting from file (%s)\n",
                        opal_restart_globals.snapshot_ref);

    snapshot = OBJ_NEW(opal_crs_base_snapshot_t);
    snapshot->cold_start         = true;
    opal_asprintf(&(snapshot->snapshot_directory), "%s/%s",
             opal_restart_globals.snapshot_loc,
             opal_restart_globals.snapshot_ref);
    opal_asprintf(&(snapshot->metadata_filename), "%s/%s",
             snapshot->snapshot_directory,
             opal_restart_globals.snapshot_metadata);

    /* Since some checkpoint/restart systems don't pass along env vars to the
     * restarted app, we need to take care of that.
     *
     * Included here is the creation of any files or directories that need to be
     * created before the process is restarted.
     */
    if(OPAL_SUCCESS != (ret = post_env_vars(prev_pid, snapshot) ) ) {
        exit_status = ret;
        goto cleanup;
    }

    /*
     * Do the actual restart
     */
    ret = opal_crs.crs_restart(snapshot,
                               false,
                               &child_pid);

    if (OPAL_SUCCESS != ret) {
        opal_show_help("help-opal-restart.txt", "restart_cmd_failure", true,
                       opal_restart_globals.snapshot_ref,
                       ret,
                       opal_crs_base_selected_component.base_version.mca_component_name);
        exit_status = ret;
        goto cleanup;
    }
    /* Should never get here, since crs_restart calls exec */

    /***************
     * Cleanup
     ***************/
 cleanup:
    if (OPAL_SUCCESS != (ret = finalize())) {
        return ret;
    }

    if(NULL != snapshot )
        OBJ_DESTRUCT(snapshot);

    return exit_status;
}
/*
 * RML send connect information to remote endpoint
 */
static int send_connect_data(mca_btl_base_endpoint_t* endpoint, 
                             uint8_t message_type)
{
    opal_buffer_t* buffer = OBJ_NEW(opal_buffer_t);
    int rc;
    
    if (NULL == buffer) {
         ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
         return ORTE_ERR_OUT_OF_RESOURCE;
    }

    /* pack the info in the send buffer */ 
    BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT8));
    rc = opal_dss.pack(buffer, &message_type, 1, OPAL_UINT8);
    if (ORTE_SUCCESS != rc) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }

    BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT64));
    rc = opal_dss.pack(buffer, &endpoint->subnet_id, 1, OPAL_UINT64);
    if (ORTE_SUCCESS != rc) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }

    if (message_type != ENDPOINT_CONNECT_REQUEST) {
        /* send the QP connect request info we respond to */
        BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
        rc = opal_dss.pack(buffer,
                           &endpoint->rem_info.rem_qps[0].rem_qp_num, 1,
                           OPAL_UINT32);
        if (ORTE_SUCCESS != rc) {
            ORTE_ERROR_LOG(rc);
            return rc;
        }
        BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT16));
        rc = opal_dss.pack(buffer, &endpoint->rem_info.rem_lid, 1, OPAL_UINT16);
        if (ORTE_SUCCESS != rc) {
            ORTE_ERROR_LOG(rc);
            return rc;
        }
    }

    if (message_type != ENDPOINT_CONNECT_ACK) {
        int qp;
        /* stuff all the QP info into the buffer */
        for (qp = 0; qp < mca_btl_openib_component.num_qps; qp++) { 
            BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
            rc = opal_dss.pack(buffer, &endpoint->qps[qp].qp->lcl_qp->qp_num,
                               1, OPAL_UINT32);
            if (ORTE_SUCCESS != rc) {
                ORTE_ERROR_LOG(rc);
                return rc;
            }
            BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
            rc = opal_dss.pack(buffer, &endpoint->qps[qp].qp->lcl_psn, 1,
                               OPAL_UINT32); 
            if (ORTE_SUCCESS != rc) {
                ORTE_ERROR_LOG(rc);
                return rc;
            }
        }
        
        BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT16));
        rc = opal_dss.pack(buffer, &endpoint->endpoint_btl->lid, 1, OPAL_UINT16);
        if (ORTE_SUCCESS != rc) {
            ORTE_ERROR_LOG(rc);
            return rc;
        }
        BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
        rc = opal_dss.pack(buffer, &endpoint->endpoint_btl->device->mtu, 1,
                OPAL_UINT32);
        if (ORTE_SUCCESS != rc) {
            ORTE_ERROR_LOG(rc);
            return rc;
        }
        BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
        rc = opal_dss.pack(buffer, &endpoint->index, 1, OPAL_UINT32);
        if (ORTE_SUCCESS != rc) {
            ORTE_ERROR_LOG(rc);
            return rc;
        }
    }

    /* send to remote endpoint */
    rc = orte_rml.send_buffer_nb(&endpoint->endpoint_proc->proc_guid, 
                                 buffer, OMPI_RML_TAG_OPENIB, 0,
                                 rml_send_cb, NULL);
    if (ORTE_SUCCESS != rc) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }
    BTL_VERBOSE(("Sent QP Info, LID = %d, SUBNET = %016x\n",
                 endpoint->endpoint_btl->lid, 
                 endpoint->subnet_id));

    return OMPI_SUCCESS;
}
Exemplo n.º 16
0
static int query(pid_t pid,
                 opal_pstats_t *stats,
                 opal_node_stats_t *nstats)
{
    char data[4096];
    int fd;
    size_t numchars;
    char *ptr, *eptr;
    int i;
    int len, itime;
    double dtime;
    FILE *fp;
    char *dptr, *value;
    char **fields;
    opal_diskstats_t *ds;
    opal_netstats_t *ns;

    if (NULL != stats) {
        /* record the time of this sample */
        gettimeofday(&stats->sample_time, NULL);
        /* check the nstats - don't do gettimeofday twice
         * as it is expensive
         */
        if (NULL != nstats) {
            nstats->sample_time.tv_sec = stats->sample_time.tv_sec;
            nstats->sample_time.tv_usec = stats->sample_time.tv_usec;
        }
    } else if (NULL != nstats) {
        /* record the time of this sample */
        gettimeofday(&nstats->sample_time, NULL);
    }

    if (NULL != stats) {
        /* create the stat filename for this proc */
        numchars = snprintf(data, sizeof(data), "/proc/%d/stat", pid);
        if (numchars >= sizeof(data)) {
            return OPAL_ERR_VALUE_OUT_OF_BOUNDS;
        }

        if (0 > (fd = open(data, O_RDONLY))) {
            /* can't access this file - most likely, this means we
             * aren't really on a supported system, or the proc no
             * longer exists. Just return an error
             */
            return OPAL_ERR_FILE_OPEN_FAILURE;
        }

        /* absorb all of the file's contents in one gulp - we'll process
         * it once it is in memory for speed
         */
        memset(data, 0, sizeof(data));
        len = read(fd, data, sizeof(data)-1);
        if (len < 0) {
            /* This shouldn't happen! */
            close(fd);
            return OPAL_ERR_FILE_OPEN_FAILURE;
        }
        close(fd);

        /* remove newline at end */
        data[len] = '\0';

        /* the stat file consists of a single line in a carefully formatted
         * form. Parse it field by field as per proc(3) to get the ones we want
         */

        /* we don't need to read the pid from the file - we already know it! */
        stats->pid = pid;

        /* the cmd is surrounded by parentheses - find the start */
        if (NULL == (ptr = strchr(data, '('))) {
            /* no cmd => something wrong with data, return error */
            return OPAL_ERR_BAD_PARAM;
        }
        /* step over the paren */
        ptr++;

        /* find the ending paren */
        if (NULL == (eptr = strchr(ptr, ')'))) {
            /* no end to cmd => something wrong with data, return error */
            return OPAL_ERR_BAD_PARAM;
        }

        /* save the cmd name, up to the limit of the array */
        i = 0;
        while (ptr < eptr && i < OPAL_PSTAT_MAX_STRING_LEN) {
            stats->cmd[i++] = *ptr++;
        }

        /* move to the next field in the data */
        ptr = next_field(eptr, len);

        /* next is the process state - a single character */
        stats->state[0] = *ptr;
        /* move to next field */
        ptr = next_field(ptr, len);

        /* skip fields until we get to the times */
        ptr = next_field(ptr, len); /* ppid */
        ptr = next_field(ptr, len); /* pgrp */
        ptr = next_field(ptr, len); /* session */
        ptr = next_field(ptr, len); /* tty_nr */
        ptr = next_field(ptr, len); /* tpgid */
        ptr = next_field(ptr, len); /* flags */
        ptr = next_field(ptr, len); /* minflt */
        ptr = next_field(ptr, len); /* cminflt */
        ptr = next_field(ptr, len); /* majflt */
        ptr = next_field(ptr, len); /* cmajflt */

        /* grab the process time usage fields */
        itime = strtoul(ptr, &ptr, 10);    /* utime */
        itime += strtoul(ptr, &ptr, 10);   /* add the stime */
        /* convert to time in seconds */
        dtime = (double)itime / (double)HZ;
        stats->time.tv_sec = (int)dtime;
        stats->time.tv_usec = (int)(1000000.0 * (dtime - stats->time.tv_sec));
        /* move to next field */
        ptr = next_field(ptr, len);

        /* skip fields until we get to priority */
        ptr = next_field(ptr, len); /* cutime */
        ptr = next_field(ptr, len); /* cstime */

        /* save the priority */
        stats->priority = strtol(ptr, &ptr, 10);
        /* move to next field */
        ptr = next_field(ptr, len);

        /* skip nice */
        ptr = next_field(ptr, len);

        /* get number of threads */
        stats->num_threads = strtoul(ptr, &ptr, 10);
        /* move to next field */
        ptr = next_field(ptr, len);

        /* skip fields until we get to processor id */
        ptr = next_field(ptr, len);  /* itrealvalue */
        ptr = next_field(ptr, len);  /* starttime */
        ptr = next_field(ptr, len);  /* vsize */
        ptr = next_field(ptr, len);  /* rss */
        ptr = next_field(ptr, len);  /* rss limit */
        ptr = next_field(ptr, len);  /* startcode */
        ptr = next_field(ptr, len);  /* endcode */
        ptr = next_field(ptr, len);  /* startstack */
        ptr = next_field(ptr, len);  /* kstkesp */
        ptr = next_field(ptr, len);  /* kstkeip */
        ptr = next_field(ptr, len);  /* signal */
        ptr = next_field(ptr, len);  /* blocked */
        ptr = next_field(ptr, len);  /* sigignore */
        ptr = next_field(ptr, len);  /* sigcatch */
        ptr = next_field(ptr, len);  /* wchan */
        ptr = next_field(ptr, len);  /* nswap */
        ptr = next_field(ptr, len);  /* cnswap */
        ptr = next_field(ptr, len);  /* exit_signal */

        /* finally - get the processor */
        stats->processor = strtol(ptr, NULL, 10);

        /* that's all we care about from this data - ignore the rest */

        /* now create the status filename for this proc */
        memset(data, 0, sizeof(data));
        numchars = snprintf(data, sizeof(data), "/proc/%d/status", pid);
        if (numchars >= sizeof(data)) {
            return OPAL_ERR_VALUE_OUT_OF_BOUNDS;
        }

        if (NULL == (fp = fopen(data, "r"))) {
            /* ignore this */
            return OPAL_SUCCESS;
        }

        /* parse it according to proc(3) */
        while (NULL != (dptr = local_getline(fp))) {
            if (NULL == (value = local_stripper(dptr))) {
                /* cannot process */
                continue;
            }
            /* look for VmPeak */
            if (0 == strncmp(dptr, "VmPeak", strlen("VmPeak"))) {
                stats->peak_vsize = convert_value(value);
            } else if (0 == strncmp(dptr, "VmSize", strlen("VmSize"))) {
                stats->vsize = convert_value(value);
            } else if (0 == strncmp(dptr, "VmRSS", strlen("VmRSS"))) {
                stats->rss = convert_value(value);
            }
        }
        fclose(fp);

        /* now create the smaps filename for this proc */
        memset(data, 0, sizeof(data));
        numchars = snprintf(data, sizeof(data), "/proc/%d/smaps", pid);
        if (numchars >= sizeof(data)) {
            return OPAL_ERR_VALUE_OUT_OF_BOUNDS;
        }

        if (NULL == (fp = fopen(data, "r"))) {
            /* ignore this */
            return OPAL_SUCCESS;
        }

        /* parse it to find lines that start with "Pss" */
        while (NULL != (dptr = local_getline(fp))) {
            if (NULL == (value = local_stripper(dptr))) {
                /* cannot process */
                continue;
            }
            /* look for Pss */
            if (0 == strncmp(dptr, "Pss", strlen("Pss"))) {
                stats->pss += convert_value(value);
            }
        }
        fclose(fp);
    }

    if (NULL != nstats) {
        /* get the loadavg data */
        if (0 > (fd = open("/proc/loadavg", O_RDONLY))) {
            /* not an error if we don't find this one as it
             * isn't critical
             */
            goto diskstats;
        }

        /* absorb all of the file's contents in one gulp - we'll process
         * it once it is in memory for speed
         */
        memset(data, 0, sizeof(data));
        len = read(fd, data, sizeof(data)-1);
        close(fd);
        if (len < 0) {
            goto diskstats;
        }

        /* remove newline at end */
        data[len] = '\0';

        /* we only care about the first three numbers */
        nstats->la = strtof(data, &ptr);
        nstats->la5 = strtof(ptr, &eptr);
        nstats->la15 = strtof(eptr, NULL);

        /* see if we can open the meminfo file */
        if (NULL == (fp = fopen("/proc/meminfo", "r"))) {
            /* ignore this */
            goto diskstats;
        }

        /* read the file one line at a time */
        while (NULL != (dptr = local_getline(fp))) {
            if (NULL == (value = local_stripper(dptr))) {
                /* cannot process */
                continue;
            }
            if (0 == strcmp(dptr, "MemTotal")) {
                nstats->total_mem = convert_value(value);
            } else if (0 == strcmp(dptr, "MemFree")) {
                nstats->free_mem = convert_value(value);
            } else if (0 == strcmp(dptr, "Buffers")) {
                nstats->buffers = convert_value(value);
            } else if (0 == strcmp(dptr, "Cached")) {
                nstats->cached = convert_value(value);
            } else if (0 == strcmp(dptr, "SwapCached")) {
                nstats->swap_cached = convert_value(value);
            } else if (0 == strcmp(dptr, "SwapTotal")) {
                nstats->swap_total = convert_value(value);
            } else if (0 == strcmp(dptr, "SwapFree")) {
                nstats->swap_free = convert_value(value);
            } else if (0 == strcmp(dptr, "Mapped")) {
                nstats->mapped = convert_value(value);
            }
        }
        fclose(fp);

    diskstats:
        /* look for the diskstats file */
        if (NULL == (fp = fopen("/proc/diskstats", "r"))) {
            /* not an error if we don't find this one as it
             * isn't critical
             */
            goto netstats;
        }
        /* read the file one line at a time */
        while (NULL != (dptr = local_getline(fp))) {
            /* look for the local disks */
            if (NULL == strstr(dptr, "sd")) {
                continue;
            }
            /* parse to extract the fields */
            fields = NULL;
            local_getfields(dptr, &fields);
            if (NULL == fields) {
                continue;
            }
            if (14 < opal_argv_count(fields)) {
                opal_argv_free(fields);
                continue;
            }
            /* pack the ones of interest into the struct */
            ds = OBJ_NEW(opal_diskstats_t);
            ds->disk = strdup(fields[2]);
            ds->num_reads_completed = strtoul(fields[3], NULL, 10);
            ds->num_reads_merged = strtoul(fields[4], NULL, 10);
            ds->num_sectors_read = strtoul(fields[5], NULL, 10);
            ds->milliseconds_reading = strtoul(fields[6], NULL, 10);
            ds->num_writes_completed = strtoul(fields[7], NULL, 10);
            ds->num_writes_merged = strtoul(fields[8], NULL, 10);
            ds->num_sectors_written = strtoul(fields[9], NULL, 10);
            ds->milliseconds_writing = strtoul(fields[10], NULL, 10);
            ds->num_ios_in_progress = strtoul(fields[11], NULL, 10);
            ds->milliseconds_io = strtoul(fields[12], NULL, 10);
            ds->weighted_milliseconds_io = strtoul(fields[13], NULL, 10);
            opal_list_append(&nstats->diskstats, &ds->super);
            opal_argv_free(fields);
        }
        fclose(fp);

    netstats:
        /* look for the netstats file */
        if (NULL == (fp = fopen("/proc/net/dev", "r"))) {
            /* not an error if we don't find this one as it
             * isn't critical
             */
            goto complete;
        }
        /* skip the first two lines as they are headers */
        local_getline(fp);
        local_getline(fp);
        /* read the file one line at a time */
        while (NULL != (dptr = local_getline(fp))) {
            /* the interface is at the start of the line */
            if (NULL == (ptr = strchr(dptr, ':'))) {
                continue;
            }
            *ptr = '\0';
            ptr++;
            /* parse to extract the fields */
            fields = NULL;
            local_getfields(ptr, &fields);
            if (NULL == fields) {
                continue;
            }
            /* pack the ones of interest into the struct */
            ns = OBJ_NEW(opal_netstats_t);
            ns->net_interface = strdup(dptr);
            ns->num_bytes_recvd = strtoul(fields[0], NULL, 10);
            ns->num_packets_recvd = strtoul(fields[1], NULL, 10);
            ns->num_recv_errs = strtoul(fields[2], NULL, 10);
            ns->num_bytes_sent = strtoul(fields[8], NULL, 10);
            ns->num_packets_sent = strtoul(fields[9], NULL, 10);
            ns->num_send_errs = strtoul(fields[10], NULL, 10);
            opal_list_append(&nstats->netstats, &ns->super);
            opal_argv_free(fields);
        }
        fclose(fp);
    }

 complete:
    return OPAL_SUCCESS;
}
Exemplo n.º 17
0
static int mca_oob_ud_process_messages (struct ibv_cq *event_cq, mca_oob_ud_port_t *port)
{
    mca_oob_ud_msg_item_t *msg_item, *next_item;
    opal_list_t *processing_msgs = &mca_oob_ud_component.ud_event_processing_msgs;
    mca_oob_ud_peer_t *peer;
    mca_oob_ud_msg_hdr_t *msg_hdr;
    int msg_num, i, count;
    struct ibv_wc wc[40];
    bool peer_nacked;

    count = ibv_poll_cq (event_cq, 40, wc);
    if (count < 0)
        return count;

    /* acknowlege the events */
    ibv_ack_cq_events (event_cq, count);

    for (i = 0 ; i < count ; ++i) {
        msg_num = (int)(wc[i].wr_id & (~MCA_OOB_UD_RECV_WR));
        msg_hdr = (mca_oob_ud_msg_hdr_t *) (port->msg_buf.ptr + msg_num * port->mtu);

        VALGRIND_MAKE_MEM_DEFINED(msg_hdr, wc[i].byte_len);

        if (!(wc[i].wr_id & MCA_OOB_UD_RECV_WR) || IBV_WC_SUCCESS != wc[i].status) {
            mca_oob_ud_port_post_one_recv (port, msg_num);
            continue;
        }

        peer = mca_oob_ud_get_peer (port, &msg_hdr->ra.name, wc[i].src_qp, msg_hdr->ra.qkey,
                                    wc[i].slid, msg_hdr->ra.port_num);

        if (peer) {
            if (MCA_OOB_UD_MSG_ACK != msg_hdr->msg_type && MCA_OOB_UD_MSG_NACK != msg_hdr->msg_type &&
                MCA_OOB_UD_MSG_END != msg_hdr->msg_type) {
                mca_oob_ud_msg_item_t *msg_item = OBJ_NEW(mca_oob_ud_msg_item_t);

                msg_item->msg_num = msg_num;
                msg_item->hdr     = msg_hdr;
                msg_item->port    = port;
                msg_item->peer    = peer;

                opal_list_append (processing_msgs, (opal_list_item_t *) msg_item);
            } else {
                if (MCA_OOB_UD_MSG_ACK == msg_hdr->msg_type) {
                    (void) mca_oob_ud_event_handle_ack (port, peer, msg_hdr);
                } else if (MCA_OOB_UD_MSG_NACK == msg_hdr->msg_type) {
                    (void) mca_oob_ud_event_handle_nack (port, peer, msg_hdr);
                } else {
                    mca_oob_ud_event_handle_end (peer, msg_hdr);
                }

                mca_oob_ud_port_post_one_recv (port, msg_num);
            }
        } else {
            OPAL_OUTPUT_VERBOSE((10, mca_oob_base_output, "%s oob:ud:process_message got a null peer for message id %"
                                 PRIu64, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), msg_hdr->msg_id));
            mca_oob_ud_port_post_one_recv (port, msg_num);
        }
    }

    /* Sort messages by peer then id */
    opal_list_sort (processing_msgs, mca_oob_ud_msg_item_cmp);

    /* Send ACKs/NACKs and throw away out-of-order messages */
    msg_item = (mca_oob_ud_msg_item_t *) mca_oob_ud_list_get_first (processing_msgs);

    for (peer = NULL, peer_nacked = false ; NULL != msg_item ; msg_item = next_item) {
        if (peer != msg_item->peer) {
            peer_nacked = false;
        }

        peer = msg_item->peer;

        next_item = (mca_oob_ud_msg_item_t *) mca_oob_ud_list_get_next (processing_msgs,
                                                                        (opal_list_item_t *)msg_item);

        if (false == peer_nacked) {
            if (msg_item->hdr->msg_id > peer->peer_expected_id) {
                (void) mca_oob_ud_event_send_nack (msg_item->port, peer, msg_item->hdr);
                peer_nacked = true;
            } else if (NULL == next_item || (next_item->peer != msg_item->peer)) {
                (void) mca_oob_ud_event_send_ack (msg_item->port, msg_item->peer, msg_item->hdr);
            }
        }

        if (msg_item->hdr->msg_id != peer->peer_expected_id) {
            opal_list_remove_item (processing_msgs, (opal_list_item_t *) msg_item);
            OBJ_RELEASE(msg_item);
        } else {
            peer->peer_expected_id++;
        }
    }

    /* Process remaining messages */
    while (NULL !=
           (msg_item = (mca_oob_ud_msg_item_t *) opal_list_remove_first (processing_msgs))) {
        switch (msg_item->hdr->msg_type) {
        case MCA_OOB_UD_MSG_REQUEST:
            mca_oob_ud_event_handle_req (port, msg_item->peer, msg_item->hdr);
            break;
        case MCA_OOB_UD_MSG_REPLY:
            mca_oob_ud_event_handle_rep (port, msg_item->hdr);
            break;
        case MCA_OOB_UD_MSG_COMPLETE:
            mca_oob_ud_event_handle_completion (port, msg_item->hdr);
            break;
        case MCA_OOB_UD_MSG_DATA_OK:
            mca_oob_ud_event_handle_data_ok (port, msg_item->hdr);
            break;
        case MCA_OOB_UD_MSG_END:
            mca_oob_ud_event_handle_end (peer, msg_item->hdr);
            break;
        default:
            /* do nothing */
            break;
        }

        OBJ_RELEASE(msg_item);
    }

    return count;
}
Exemplo n.º 18
0
/***************  MODEX SECTION **************/
void orte_grpcomm_base_modex(int fd, short args, void *cbdata)
{
    orte_grpcomm_caddy_t *caddy = (orte_grpcomm_caddy_t*)cbdata;
    orte_grpcomm_collective_t *modex = caddy->op;
    int rc;
    orte_namelist_t *nm;
    opal_list_item_t *item;
    bool found;
    orte_grpcomm_collective_t *cptr;
    opal_scope_t scope;

    OBJ_RELEASE(caddy);

    OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_framework.framework_output,
                         "%s grpcomm:base:modex: performing modex",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    
    /* if we are a singleton and routing isn't enabled,
     * then we have nobody with which to communicate, so
     * we can just declare success
     */
    if ((orte_process_info.proc_type & ORTE_PROC_SINGLETON) &&
        !orte_routing_is_enabled) {
        if (NULL != modex->cbfunc) {
            OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_framework.framework_output,
                                 "%s CALLING MODEX RELEASE",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
            modex->cbfunc(NULL, modex->cbdata);
        }
        /* flag the collective as complete */
        modex->active = false;
        return;
    }

    if (0 == opal_list_get_size(&modex->participants)) {
        /* record the collective */
        modex->next_cbdata = modex;
        opal_list_append(&orte_grpcomm_base.active_colls, &modex->super);

        /* put our process name in the buffer so it can be unpacked later */
        if (ORTE_SUCCESS != (rc = opal_dss.pack(&modex->buffer, ORTE_PROC_MY_NAME, 1, ORTE_NAME))) {
            ORTE_ERROR_LOG(rc);
            goto cleanup;
        }

        /* this is between our peers, so only collect info marked for them */
        scope = OPAL_SCOPE_PEER;

        /* add a wildcard name to the participants so the daemon knows
         * the jobid that is involved in this collective
         */
        nm = OBJ_NEW(orte_namelist_t);
        nm->name.jobid = ORTE_PROC_MY_NAME->jobid;
        nm->name.vpid = ORTE_VPID_WILDCARD;
        opal_list_append(&modex->participants, &nm->super);
        modex->next_cb = orte_grpcomm_base_store_modex;
    } else {
        /* see if the collective is already present - a race condition
         * exists where other participants may have already sent us their
         * contribution. This would place the collective on the global
         * array, but leave it marked as "inactive" until we call
         * modex with the list of participants
         */
        found = false;
        for (item = opal_list_get_first(&orte_grpcomm_base.active_colls);
             item != opal_list_get_end(&orte_grpcomm_base.active_colls);
             item = opal_list_get_next(item)) {
            cptr = (orte_grpcomm_collective_t*)item;
            OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
                                 "%s CHECKING COLL id %d",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 cptr->id));
            
            if (modex->id == cptr->id) {
                found = true;
                /* remove the old entry - we will replace it
                 * with the modex one
                 */
                opal_list_remove_item(&orte_grpcomm_base.active_colls, item);
                break;
            }
        }
        if (found) {
            /* since it already exists, the list of
             * targets contains the list of procs
             * that have already sent us their info. Cycle
             * thru the targets and move those entries to
             * the modex object
             */
            while (NULL != (item = opal_list_remove_first(&cptr->targets))) {
                opal_list_append(&modex->targets, item);
            }
            /* copy the previously-saved data across */
            opal_dss.copy_payload(&modex->local_bucket, &cptr->local_bucket);
            /* cleanup */
            OBJ_RELEASE(cptr);
        }
        /* now add the modex to the global list of active collectives */
        modex->next_cb = orte_grpcomm_base_store_modex;
        modex->next_cbdata = modex;
        opal_list_append(&orte_grpcomm_base.active_colls, &modex->super);

        /* pack the collective id */
        if (ORTE_SUCCESS != (rc = opal_dss.pack(&modex->buffer, &modex->id, 1, ORTE_GRPCOMM_COLL_ID_T))) {
            ORTE_ERROR_LOG(rc);
            goto cleanup;
        }

        /* pack our name */
        if (ORTE_SUCCESS != (rc = opal_dss.pack(&modex->buffer, ORTE_PROC_MY_NAME, 1, ORTE_NAME))) {
            ORTE_ERROR_LOG(rc);
            goto cleanup;
        }

        /* this is not amongst our peers, but rather between a select
         * group of processes - e.g., during a connect/accept operation.
         * Thus, we need to include the non-peer info as well as our peers
         * since we can't tell what the other participants may already have
         */
        scope = OPAL_SCOPE_GLOBAL;

    }

    /* pack the requested entries */
    if (ORTE_SUCCESS != (rc = orte_grpcomm_base_pack_modex_entries(&modex->buffer, scope))) {
        ORTE_ERROR_LOG(rc);
        goto cleanup;
    }
    
    OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_framework.framework_output,
                         "%s grpcomm:base:full:modex: executing allgather",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    
    /* execute the allgather */
    if (ORTE_SUCCESS != (rc = orte_grpcomm.allgather(modex))) {
        ORTE_ERROR_LOG(rc);
        goto cleanup;
    }

    OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_framework.framework_output,
                         "%s grpcomm:base:modex: modex posted",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    
    return;

 cleanup:
    return;
}
Exemplo n.º 19
0
int orte_show_help_norender(const char *filename, const char *topic, 
                            bool want_error_header, const char *output)
{
    int rc = ORTE_SUCCESS;
    int8_t have_output = 1;

    if (!ready) {
        /* if we are finalizing, then we have no way to process
         * this through the orte_show_help system - just drop it to
         * stderr; that's at least better than not showing it.
         *
         * If we are not finalizing, then this is probably a show_help
         * stemming from either a cmd-line request to display the usage
         * message, or a show_help related to a user error. In either case,
         * we can't do anything but just print to stderr.
         */
        fprintf(stderr, "%s", output);
        goto CLEANUP;
    }
    
    /* if we are the HNP, or the RML has not yet been setup,
     * or ROUTED has not been setup,
     * or we weren't given an HNP, then all we can do
     * is process this locally
     */
    if (ORTE_PROC_IS_HNP ||
        NULL == orte_rml.send_buffer_nb ||
        NULL == orte_routed.get_route ||
        NULL == orte_process_info.my_hnp_uri) {
        rc = show_help(filename, topic, output, ORTE_PROC_MY_NAME);
    }
    
    /* otherwise, we relay the output message to
     * the HNP for processing
     */
    else {
        opal_buffer_t *buf;
        static bool am_inside = false;

        /* JMS Note that we *may* have a recursion situation here where
           the RML could call show_help.  Need to think about this
           properly, but put a safeguard in here for sure for the time
           being. */
        if (am_inside) {
            rc = show_help(filename, topic, output, ORTE_PROC_MY_NAME);
        } else {
            am_inside = true;
        
            /* build the message to the HNP */
            buf = OBJ_NEW(opal_buffer_t);
            /* pack the filename of the show_help text file */
            opal_dss.pack(buf, &filename, 1, OPAL_STRING);
            /* pack the topic tag */
            opal_dss.pack(buf, &topic, 1, OPAL_STRING);
            /* pack the flag that we have a string */
            opal_dss.pack(buf, &have_output, 1, OPAL_INT8);
            /* pack the resulting string */
            opal_dss.pack(buf, &output, 1, OPAL_STRING);
            /* send it to the HNP */
            if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf,
                                                  ORTE_RML_TAG_SHOW_HELP, 0,
                                                  orte_rml_send_callback, NULL))) {
                ORTE_ERROR_LOG(rc);
                OBJ_RELEASE(buf);
            } else {
                rc = ORTE_SUCCESS;
            }
            am_inside = false;
        }
    }
    
CLEANUP:
    return rc;
}
Exemplo n.º 20
0
int orcm_init(orcm_proc_type_t flags)
{
    int ret;
    char *error;
    int i, spin;

    if (NULL != getenv("ORCM_MCA_spin")) {
        spin = 1;
        /* spin until a debugger can attach */
        while (0 != spin) {
            ret = 0;
            while (ret < 10000) {
                ret++;
            };
        }
    }
    
    if (!orcm_util_initialized) {
        orcm_init_util();
    }
    
    /* set the default leader policy */
    orcm_default_leader_policy.jobid = ORTE_JOBID_WILDCARD;
    orcm_default_leader_policy.vpid = ORTE_VPID_WILDCARD;

    /* get the number of max msgs */
    mca_base_param_reg_int_name("orcm", "max_buffered_msgs",
                                "Number of recvd messages to hold in storage from each source",
                                false, false, ORCM_MAX_MSG_RING_SIZE, &orcm_max_msg_ring_size);

    /* independent mode or not */
    mca_base_param_reg_int_name("orcm", "sched_kill_dvm",
                                "Whether or not scheduler kills associated daemons upon termination (default: no)",
                                false, false, (int)false, &ret);
    orcm_sched_kill_dvm = OPAL_INT_TO_BOOL(ret);

    /* setup the globals that require initialization */
    orcm_triplets = OBJ_NEW(orcm_triplets_array_t);

#ifdef HAVE_QSYSTEM_H
#ifdef Q_SYSTEM_INTFCS_TO_PROBE_FOR_IP_ADDRESS
{
    char *eth_ifs[] = Q_SYSTEM_INTFCS_TO_PROBE_FOR_IP_ADDRESS;
    char **adds=NULL, *ifs, *envar;
    int i, num_ifs;

    num_ifs = sizeof(eth_ifs) / sizeof(eth_ifs[0]);
    for (i=0; i < num_ifs; i++) {
        opal_argv_append_nosize(&adds, eth_ifs[i]);
    }
    ifs = opal_argv_join(adds, ',');
    opal_argv_free(adds);
    /* push it into the environ so that the rmcast framework can get it */
    asprintf(&envar, "OMPI_MCA_rmcast_base_if_include=%s", ifs);
    putenv(envar);
    /* cannot release envar as the environ doesn't keep its own copy */
    free(ifs);
}
#endif
#endif

    /* initialize us */
    if (ORTE_SUCCESS != (ret = orte_init(NULL, NULL, flags))) {
        error = "orte_init";
        goto error;
    }

    if (!ORCM_PROC_IS_TOOL) {
        opal_set_using_threads(true);
    }

    if (!ORCM_PROC_IS_APP) {
        trap_signals();
    }

    orcm_initialized = true;
    
    return ORCM_SUCCESS;

error:
    if (ORCM_ERR_SILENT != ret) {
        orte_show_help("help-openrcm-runtime.txt",
                       "orcm_init:startup:internal-failure",
                       true, error, ORTE_ERROR_NAME(ret), ret);
    }
    
    return ret;
}
static mca_sbgp_base_module_t *mca_sbgp_basesmsocket_select_procs(struct ompi_proc_t ** procs,
    int n_procs_in,
    struct ompi_communicator_t *comm,
    char *key,
    void *output_data
    )
{
    /* local variables */
    mca_sbgp_basesmsocket_module_t *module;
    int ret;
    int my_socket_index;
    int proc, cnt, local, n_local_peers, my_index, my_rank;
    ompi_proc_t* my_proc;
    int *local_ranks_in_comm=NULL;
    int *socket_info=NULL, my_socket_info;
    int  i_cnt, lp_cnt, my_local_index = -1, comm_size=ompi_comm_size(comm);

    /* initialize data */
    output_data=NULL;
    my_rank=ompi_comm_rank(comm);
    my_proc=ompi_comm_peer_lookup(comm,my_rank);
    for( proc=0 ; proc < n_procs_in ; proc++) {
        if( procs[proc]==my_proc){
            my_index=proc;
        }
    }

    /*create a new module*/
    module=OBJ_NEW(mca_sbgp_basesmsocket_module_t);
    if (!module ) {
        return NULL;
    }
    module->super.group_size=0;
    module->super.group_comm = comm;
    module->super.group_list = NULL;
    module->super.group_net = OMPI_SBGP_SOCKET;

    /* test to see if process is bound */
    if( OPAL_BIND_TO_NONE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy) ) {

        /* pa affinity not set, so socket index will be set to -1 */
        my_socket_index=-1;
        /*debug print*/
        /* */
        BASESMSOCKET_VERBOSE(10, ("[%d] FAILED to set basesmsocket group, processes are not bound!!!\n",my_rank));
        /*end debug*/
        goto NoLocalPeers;
    } else {

        my_socket_index=-1;
        /* this should find my logical socket id which is the socket id we want 
         * physical socket ids are not necessarily unique, logical ones, as defined
         * by the hwloc API are unique. 
         */
        if( OMPI_SUCCESS != mca_sbgp_map_to_logical_socket_id(&my_socket_index)){
            BASESMSOCKET_VERBOSE(10, ("[%d] FAILED to set basesmsocket group !!!\n",my_rank));

            goto NoLocalPeers;
        }
    }

    /* Debug prints */
    /*
       {
       fprintf(stderr,"Number of processors per node: %d\n",num_processors);
       fprintf(stderr,"I am rank %d and my socket index is %d\n and my core index is %d\n",my_rank,my_socket_index,core_index);
       fprintf(stderr,"n_proc_in = %d\n",n_procs_in);
       fprintf(stderr,"\n");
       fflush(stderr);
       }
       end debug prints */


    /*get my socket index*/
    cnt=0;
    for( proc=0 ; proc < n_procs_in ; proc++) {
        local=OPAL_PROC_ON_LOCAL_NODE(procs[proc]->proc_flags);
        if( local ) {
            cnt++;
        }
    }
     /*debug print */
    /*
    fprintf(stderr,"Number of local processors %d\n",cnt);
    end debug print*/

    /* if no other local procs found skip to end */
    if( 1 >= cnt ) {
      goto NoLocalPeers;
    }



    /* allocate structure to hold the list of local ranks */
    local_ranks_in_comm=(int *)malloc(sizeof(int)*cnt);
    if(NULL == local_ranks_in_comm ){
        goto Error;
    }
    /* figure out which ranks from the input communicator - comm - will
     * particiapte in the local socket determination.
     */

    n_local_peers=0;
    i_cnt=0;
    for( proc = 0; proc < n_procs_in; proc++){
        local = OPAL_PROC_ON_LOCAL_NODE(procs[proc]->proc_flags);
        if ( local ) {

            /* set the rank within the on-host ranks - this will be used for tha
             * allgather
             */
            if( my_proc == procs[proc] ) {
                my_local_index=n_local_peers;
            }
            /* find the rank of the current proc in comm.  We take advantage
             * of the fact that ranks in a group have the same relative
             * ordering as they do within the communicator.
             */
            for( lp_cnt=proc; lp_cnt < comm_size ; lp_cnt++ ) {
                if(procs[proc] == ompi_comm_peer_lookup(comm,lp_cnt) ){
                    local_ranks_in_comm[i_cnt]=lp_cnt;
                    /* lp_cnt has alrady been checked */
                    i_cnt++;
                    /* found the corresponding rank in comm, so don't need
                     * to search any more */
                    break;
                }
                /*i_cnt++;*/
                /*fprintf(stderr,"QQQ i_cnt %d \n",i_cnt);*/
            }
            n_local_peers++;
        }
        }
        /*fprintf(stderr,"YYY n_local_peers %d\n",n_local_peers);*/
        socket_info=(int *)malloc(sizeof(int)*n_local_peers);
        /*fprintf(stderr,"XXX got socket info\n");*/
        if(NULL == socket_info ){
            goto Error;
        }

        my_socket_info=my_socket_index;

        /* Allgather data over the communicator */
        ret=comm_allgather_pml(&my_socket_info, socket_info, 1,
                MPI_INT, my_local_index, n_local_peers, local_ranks_in_comm,comm);
        if (OMPI_SUCCESS != ret ) {
            BASESMSOCKET_VERBOSE(10, ("comm_allgather_pml returned error %d\n",ret));
            return NULL;
        }


        /*allocate memory to the group_list probably an overestimation
          of the necessary resources */
        module->super.group_list=(int *)malloc(sizeof(int)*cnt);
        if(NULL == module->super.group_list){
            goto Error;
        }

        /* figure out who is sharing the same socket */
        cnt=0;
        for (proc = 0; proc < n_local_peers; proc++) {
            int rem_rank=local_ranks_in_comm[proc];
            int rem_socket_index=socket_info[proc];

            /*Populate the list*/
            if (rem_socket_index == my_socket_index) {
                module->super.group_list[cnt]=rem_rank;
                cnt++;
            }
        }

        module->super.group_size=cnt;

#if 0
    /*debug print*/
    
    {
        int ii;
        fprintf(stderr,"Ranks per socket: %d\n",cnt);
        fprintf(stderr,"Socket %d owns ranks: ", my_socket_index);
        for (ii=0; ii < cnt; ii++)
            fprintf(stderr,"%d ",module->super.group_list[ii]);
        fprintf(stderr,"\n");
        fflush(stderr);
    }
#endif

   /* end debug*/


    /*Free resources*/
    free(local_ranks_in_comm);
    free(socket_info);

    /*Return the module*/
    return (mca_sbgp_base_module_t *) module;


NoLocalPeers:
    /* nothing to store, so just free the module and return */
    /*fprintf(stderr,"No local socket peers\n");*/
    /*free(module);*/
    if(socket_info) {
        free(socket_info);
        socket_info=NULL;
    }
    if(local_ranks_in_comm){
        free(local_ranks_in_comm);
    }
    OBJ_RELEASE(module);
    return NULL;

Error:
    /*clean up*/
    if( NULL != module->super.group_list){
        free(module->super.group_list);
        module->super.group_list=NULL;
    }
    if(socket_info) {
        free(socket_info);
        socket_info=NULL;
    }
    if(local_ranks_in_comm){
        free(local_ranks_in_comm);
    }
    OBJ_RELEASE(module);
    return NULL;


}
Exemplo n.º 22
0
static void _send_notification(int status,
                               orte_proc_state_t state,
                               orte_process_name_t *proc,
                               orte_process_name_t *target)
{
    opal_buffer_t *buf;
    orte_grpcomm_signature_t sig;
    int rc;
    opal_value_t kv, *kvptr;
    orte_process_name_t daemon;

    buf = OBJ_NEW(opal_buffer_t);

    opal_output_verbose(5, orte_state_base_framework.framework_output,
                        "%s state:hnp:sending notification %s proc %s target %s",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        ORTE_ERROR_NAME(status),
                        ORTE_NAME_PRINT(proc),
                        ORTE_NAME_PRINT(target));

    /* pack the status */
    if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &status, 1, OPAL_INT))) {
        ORTE_ERROR_LOG(rc);
        OBJ_RELEASE(buf);
        return;
    }

    /* the source is the proc */
    if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, proc, 1, ORTE_NAME))) {
        ORTE_ERROR_LOG(rc);
        OBJ_RELEASE(buf);
        return;
    }

    if (OPAL_ERR_PROC_ABORTED == status) {
        /* we will pass three opal_value_t's */
        rc = 3;
        if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &rc, 1, OPAL_INT))) {
            ORTE_ERROR_LOG(rc);
            OBJ_RELEASE(buf);
            return;
        }
        /* pass along the affected proc(s) */
        OBJ_CONSTRUCT(&kv, opal_value_t);
        kv.key = strdup(OPAL_PMIX_EVENT_AFFECTED_PROC);
        kv.type = OPAL_NAME;
        kv.data.name.jobid = proc->jobid;
        kv.data.name.vpid = proc->vpid;
        kvptr = &kv;
        if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &kvptr, 1, OPAL_VALUE))) {
            ORTE_ERROR_LOG(rc);
            OBJ_DESTRUCT(&kv);
            OBJ_RELEASE(buf);
            return;
        }
        OBJ_DESTRUCT(&kv);
    } else {
        /* we are going to pass two opal_value_t's */
        rc = 2;
        if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &rc, 1, OPAL_INT))) {
            ORTE_ERROR_LOG(rc);
            OBJ_RELEASE(buf);
            return;
        }
    }

    /* pass along the affected proc(s) */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_EVENT_AFFECTED_PROC);
    kv.type = OPAL_NAME;
    kv.data.name.jobid = proc->jobid;
    kv.data.name.vpid = proc->vpid;
    kvptr = &kv;
    if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &kvptr, 1, OPAL_VALUE))) {
        ORTE_ERROR_LOG(rc);
        OBJ_DESTRUCT(&kv);
        OBJ_RELEASE(buf);
        return;
    }
    OBJ_DESTRUCT(&kv);

    /* pass along the proc(s) to be notified */
    OBJ_CONSTRUCT(&kv, opal_value_t);
    kv.key = strdup(OPAL_PMIX_EVENT_CUSTOM_RANGE);
    kv.type = OPAL_NAME;
    kv.data.name.jobid = target->jobid;
    kv.data.name.vpid = target->vpid;
    kvptr = &kv;
    if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &kvptr, 1, OPAL_VALUE))) {
        ORTE_ERROR_LOG(rc);
        OBJ_DESTRUCT(&kv);
        OBJ_RELEASE(buf);
        return;
    }
    OBJ_DESTRUCT(&kv);

    /* if the targets are a wildcard, then xcast it to everyone */
    if (ORTE_VPID_WILDCARD == target->vpid) {
        OBJ_CONSTRUCT(&sig, orte_grpcomm_signature_t);
        sig.signature = (orte_process_name_t*)malloc(sizeof(orte_process_name_t));
        sig.signature[0].jobid = ORTE_PROC_MY_NAME->jobid;
        sig.signature[0].vpid = ORTE_VPID_WILDCARD;
        sig.sz = 1;

        if (ORTE_SUCCESS != (rc = orte_grpcomm.xcast(&sig, ORTE_RML_TAG_NOTIFICATION, buf))) {
            ORTE_ERROR_LOG(rc);
        }
        OBJ_DESTRUCT(&sig);
        OBJ_RELEASE(buf);
    } else {
        /* get the daemon hosting the proc to be notified */
        daemon.jobid = ORTE_PROC_MY_NAME->jobid;
        daemon.vpid = orte_get_proc_daemon_vpid(target);
        /* send the notification to that daemon */
        opal_output_verbose(5, orte_state_base_framework.framework_output,
                            "%s state:base:sending notification %s to proc %s at daemon %s",
                            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                            ORTE_ERROR_NAME(status),
                            ORTE_NAME_PRINT(target),
                            ORTE_NAME_PRINT(&daemon));
        if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(&daemon, buf,
                                                          ORTE_RML_TAG_NOTIFICATION,
                                                          orte_rml_send_callback, NULL))) {
            ORTE_ERROR_LOG(rc);
            OBJ_RELEASE(buf);
        }
    }
}
/*
 * Function to find as many components of a given type as possible.  This
 * includes statically-linked in components as well as opening up a
 * directory and looking for shared-library MCA components of the
 * appropriate type (load them if available).
 *
 * Return one consolidated array of (mca_base_component_t*) pointing to all
 * available components.
 */
int mca_base_component_find(const char *directory, const char *type, 
                            const mca_base_component_t *static_components[], 
                            const char *requested_components,
                            opal_list_t *found_components,
                            bool open_dso_components)
{
    char **requested_component_names = NULL;
    mca_base_component_list_item_t *cli;
    bool include_mode;
    int i, ret;

    ret = parse_requested (requested_components, &include_mode,
                           &requested_component_names);
    if (OPAL_SUCCESS != ret) {
        return ret;
    }

    /* Find all the components that were statically linked in */
    OBJ_CONSTRUCT(found_components, opal_list_t);
    for (i = 0; NULL != static_components &&
             NULL != static_components[i]; ++i) {
        if ( use_component(include_mode,
                           (const char**)requested_component_names,
                           static_components[i]->mca_component_name) ) {
            cli = OBJ_NEW(mca_base_component_list_item_t);
            if (NULL == cli) {
                ret = OPAL_ERR_OUT_OF_RESOURCE;
                goto component_find_out;
            }
            cli->cli_component = static_components[i];
            opal_list_append(found_components, (opal_list_item_t *) cli);
        }
    }

#if OPAL_WANT_LIBLTDL
    /* Find any available dynamic components in the specified directory */
    if (open_dso_components && !mca_base_component_disable_dlopen) {
        find_dyn_components(directory, type,
                            (const char**)requested_component_names,
                            include_mode, found_components); 
    } else {
        opal_output_verbose(40, 0, 
                            "mca: base: component_find: dso loading for %s MCA components disabled", 
                            type);
    }
#endif

    if (include_mode) {
        ret = component_find_check (type, requested_component_names, found_components);
    } else {
        ret = OPAL_SUCCESS;
    }


    ret = OPAL_SUCCESS;

component_find_out:

    if (NULL != requested_component_names) {
        opal_argv_free(requested_component_names);
    }

    /* All done */

    return ret;
}
Exemplo n.º 24
0
static int orcmd_init(void)
{
    int ret = ORTE_ERROR;
    char *error = NULL;
    opal_buffer_t buf, *clusterbuf, *uribuf;
    orte_job_t *jdata;
    orte_node_t *node;
    orte_proc_t *proc;
    opal_list_t config;
    orcm_scheduler_t *scheduler;
    orcm_node_t *mynode=NULL;
    int32_t n;

    if (initialized) {
        return ORCM_SUCCESS;
    }
    initialized = true;

    /* Initialize the ORTE data type support */
    if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) {
        error = "orte_std_prolog";
        goto error;
    }

    /* setup the global job and node arrays */
    orte_job_data = OBJ_NEW(opal_pointer_array_t);
    if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_job_data,
                                                       1,
                                                       ORTE_GLOBAL_ARRAY_MAX_SIZE,
                                                       1))) {
        ORTE_ERROR_LOG(ret);
        error = "setup job array";
        goto error;
    }

    orte_node_pool = OBJ_NEW(opal_pointer_array_t);
    if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_node_pool,
                                                       ORTE_GLOBAL_ARRAY_BLOCK_SIZE,
                                                       ORTE_GLOBAL_ARRAY_MAX_SIZE,
                                                       ORTE_GLOBAL_ARRAY_BLOCK_SIZE))) {
        ORTE_ERROR_LOG(ret);
        error = "setup node array";
        goto error;
    }
    orte_node_topologies = OBJ_NEW(opal_pointer_array_t);
    if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_node_topologies,
                                                       ORTE_GLOBAL_ARRAY_BLOCK_SIZE,
                                                       ORTE_GLOBAL_ARRAY_MAX_SIZE,
                                                       ORTE_GLOBAL_ARRAY_BLOCK_SIZE))) {
        ORTE_ERROR_LOG(ret);
        error = "setup node topologies array";
        goto error;
    }

    /* create a job tracker for the daemons */
    jdata = OBJ_NEW(orte_job_t);
    jdata->jobid = 0;
    ORTE_PROC_MY_NAME->jobid = 0;
    opal_pointer_array_set_item(orte_job_data, 0, jdata);

    /* read the site configuration */
    OBJ_CONSTRUCT(&config, opal_list_t);
    if (ORCM_SUCCESS != (ret = orcm_cfgi.read_config(&config))) {
        error = "getting config";
        goto error;
    }

    /* define the cluster and collect contact info for all
     * aggregators - we'll need to know how to talk to any
     * of them in case of failures
     */
    OBJ_CONSTRUCT(&buf, opal_buffer_t);
    if (ORCM_SUCCESS != (ret = orcm_cfgi.define_system(&config,
                                                       &mynode,
                                                       &orte_process_info.num_procs,
                                                       &buf))) {
        OBJ_DESTRUCT(&buf);
        error = "define system";
        goto error;
    }

    /* if my name didn't get set, then we didn't find our node
     * in the config - report it and die
     */
    if (NULL == mynode) {
        orte_show_help("help-ess-orcm.txt", "node-not-found", true,
                       orcm_cfgi_base.config_file,
                       orte_process_info.nodename);
        OBJ_DESTRUCT(&buf);
        return ORTE_ERR_SILENT;
    }

    /* define a node and proc object for ourselves as some parts
     * of ORTE and ORCM require it */
    if (NULL == (node = OBJ_NEW(orte_node_t))) {
        ret = ORTE_ERR_OUT_OF_RESOURCE;
        error = "out of memory";
        goto error;
    }
    node->name = strdup(orte_process_info.nodename);
    opal_pointer_array_set_item(orte_node_pool, ORTE_PROC_MY_NAME->vpid, node);
    if (NULL == (proc = OBJ_NEW(orte_proc_t))) {
        ret = ORTE_ERR_OUT_OF_RESOURCE;
        error = "out of memory";
        goto error;
    }
    proc->name.jobid = ORTE_PROC_MY_NAME->jobid;
    proc->name.vpid = ORTE_PROC_MY_NAME->vpid;
    OBJ_RETAIN(proc);
    node->daemon = proc;
    OBJ_RETAIN(node);
    proc->node = node;
    opal_pointer_array_set_item(jdata->procs, ORTE_PROC_MY_NAME->vpid, proc);

    /* For now, we only support a single scheduler daemon in the system.
     * This *may* change someday in the future */
    scheduler = (orcm_scheduler_t*)opal_list_get_first(orcm_schedulers);

    /* If we are in test mode, then we don't *require* that a scheduler
     * be defined in the system - otherwise, we do */
    if (NULL == scheduler) {
        if (mca_sst_orcmd_component.scheduler_reqd) {
            error = "no scheduler found";
            ret = ORTE_ERR_NOT_FOUND;
            goto error;
        }
    } else {
        ORTE_PROC_MY_SCHEDULER->jobid = scheduler->controller.daemon.jobid;
        ORTE_PROC_MY_SCHEDULER->vpid = scheduler->controller.daemon.vpid;
    }

    /* register the ORTE-level params at this time now that the
     * config has had a chance to push things into the environ
     */
    if (ORTE_SUCCESS != (ret = orte_register_params())) {
        OBJ_DESTRUCT(&buf);
        error = "orte_register_params";
        goto error;
    }

    /* setup callback for SIGPIPE */
    setup_sighandler(SIGPIPE, &epipe_handler, epipe_signal_callback);
    /* Set signal handlers to catch kill signals so we can properly clean up
     * after ourselves.
     */
    setup_sighandler(SIGTERM, &term_handler, shutdown_signal);
    setup_sighandler(SIGINT, &int_handler, shutdown_signal);

    /** setup callbacks for signals we should ignore */
    setup_sighandler(SIGUSR1, &sigusr1_handler, signal_callback);
    setup_sighandler(SIGUSR2, &sigusr2_handler, signal_callback);
    signals_set = true;

#if OPAL_HAVE_HWLOC
    {
        hwloc_obj_t obj;
        unsigned i, j;

        /* get the local topology */
        if (NULL == opal_hwloc_topology) {
            if (OPAL_SUCCESS != opal_hwloc_base_get_topology()) {
                OBJ_DESTRUCT(&buf);
                error = "topology discovery";
                goto error;
            }
        }

        /* remove the hostname from the topology. Unfortunately, hwloc
         * decided to add the source hostname to the "topology", thus
         * rendering it unusable as a pure topological description. So
         * we remove that information here.
         */
        obj = hwloc_get_root_obj(opal_hwloc_topology);
        for (i=0; i < obj->infos_count; i++) {
            if (NULL == obj->infos[i].name ||
                NULL == obj->infos[i].value) {
                continue;
            }
            if (0 == strncmp(obj->infos[i].name, "HostName", strlen("HostName"))) {
                free(obj->infos[i].name);
                free(obj->infos[i].value);
                /* left justify the array */
                for (j=i; j < obj->infos_count-1; j++) {
                    obj->infos[j] = obj->infos[j+1];
                }
                obj->infos[obj->infos_count-1].name = NULL;
                obj->infos[obj->infos_count-1].value = NULL;
                obj->infos_count--;
                break;
            }
        }

        if (15 < opal_output_get_verbosity(orcm_sst_base_framework.framework_output)) {
            opal_output(0, "%s Topology Info:", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            opal_dss.dump(0, opal_hwloc_topology, OPAL_HWLOC_TOPO);
        }

        /* if we were asked to bind to specific core(s), do so now */
        if (NULL != orte_daemon_cores) {
            char **cores=NULL, tmp[128];
            hwloc_obj_t pu;
            hwloc_cpuset_t ours, pucpus, res;
            int core;

            /* could be a collection of comma-delimited ranges, so
             * use our handy utility to parse it
             */
            orte_util_parse_range_options(orte_daemon_cores, &cores);
            if (NULL != cores) {
                ours = hwloc_bitmap_alloc();
                hwloc_bitmap_zero(ours);
                pucpus = hwloc_bitmap_alloc();
                res = hwloc_bitmap_alloc();
                for (i=0; NULL != cores[i]; i++) {
                    core = strtoul(cores[i], NULL, 10);
                    if (NULL == (pu = opal_hwloc_base_get_pu(opal_hwloc_topology, core, OPAL_HWLOC_LOGICAL))) {
                        orte_show_help("help-orted.txt", "orted:cannot-bind",
                                       true, orte_process_info.nodename,
                                       orte_daemon_cores);
                        ret = ORTE_ERR_NOT_SUPPORTED;
                        OBJ_DESTRUCT(&buf);
                        error = "cannot bind";
                        goto error;
                    }
                    hwloc_bitmap_and(pucpus, pu->online_cpuset, pu->allowed_cpuset);
                    hwloc_bitmap_or(res, ours, pucpus);
                    hwloc_bitmap_copy(ours, res);
                }
                /* if the result is all zeros, then don't bind */
                if (!hwloc_bitmap_iszero(ours)) {
                    (void)hwloc_set_cpubind(opal_hwloc_topology, ours, 0);
                    if (opal_hwloc_report_bindings) {
                        opal_hwloc_base_cset2mapstr(tmp, sizeof(tmp), opal_hwloc_topology, ours);
                        opal_output(0, "Daemon %s is bound to cores %s",
                                    ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tmp);
                    }
                }
                /* cleanup */
                hwloc_bitmap_free(ours);
                hwloc_bitmap_free(pucpus);
                hwloc_bitmap_free(res);
                opal_argv_free(cores);
            }
        }
    }
#endif

    /* open and select the pstat framework */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&opal_pstat_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        OBJ_DESTRUCT(&buf);
        error = "opal_pstat_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = opal_pstat_base_select())) {
        ORTE_ERROR_LOG(ret);
        OBJ_DESTRUCT(&buf);
        error = "opal_pstat_base_select";
        goto error;
    }

    /* open and setup the state machine */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_state_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        OBJ_DESTRUCT(&buf);
        error = "orte_state_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_state_base_select())) {
        ORTE_ERROR_LOG(ret);
        OBJ_DESTRUCT(&buf);
        error = "orte_state_base_select";
        goto error;
    }

    /* open the notifier */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_notifier_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        OBJ_DESTRUCT(&buf);
        error = "orte_notifier_base_open";
        goto error;
    }

    /* open the errmgr */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_errmgr_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        OBJ_DESTRUCT(&buf);
        error = "orte_errmgr_base_open";
        goto error;
    }

    /* Setup the communication infrastructure */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_oob_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        OBJ_DESTRUCT(&buf);
        error = "orte_oob_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_oob_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_oob_base_select";
        goto error;
    }
    if (!opal_list_get_size(&orte_oob_base.actives)) {
        ret = ORTE_ERROR;
        error = "orte_oob: Found 0 active transports";
        goto error;
    }

    /* Runtime Messaging Layer */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rml_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        OBJ_DESTRUCT(&buf);
        error = "orte_rml_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_rml_base_select())) {
        ORTE_ERROR_LOG(ret);
        OBJ_DESTRUCT(&buf);
        error = "orte_rml_base_select";
        goto error;
    }

    /* select the notifier*/
    if (ORTE_SUCCESS != (ret = orte_notifier_base_select())) {
        ORTE_ERROR_LOG(ret);
        OBJ_DESTRUCT(&buf);
        error = "orte_notifier_base_select";
        goto error;
    }

    /* select the errmgr */
    if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) {
        ORTE_ERROR_LOG(ret);
        OBJ_DESTRUCT(&buf);
        error = "orte_errmgr_base_select";
        goto error;
    }

    /* Routed system */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_routed_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        OBJ_DESTRUCT(&buf);
        error = "orte_rml_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_routed_base_select())) {
        ORTE_ERROR_LOG(ret);
        OBJ_DESTRUCT(&buf);
        error = "orte_routed_base_select";
        goto error;
    }

    /* database */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orcm_db_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        OBJ_DESTRUCT(&buf);
        error = "orcm_db_base_open";
        goto error;
    }
    /* always restrict daemons to local database components */
    if (ORTE_SUCCESS != (ret = orcm_db_base_select())) {
        ORTE_ERROR_LOG(ret);
        OBJ_DESTRUCT(&buf);
        error = "orcm_db_base_select";
        goto error;
    }

    /* datastore - ensure we don't pickup the pmi component, but
     * don't override anything set by user
     */
    if (NULL == getenv(OPAL_MCA_PREFIX"dstore")) {
        putenv(OPAL_MCA_PREFIX"dstore=^pmi");
    }
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&opal_dstore_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "opal_dstore_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = opal_dstore_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "opal_dstore_base_select";
        goto error;
    }
    /* create the handle */
    if (0 > (opal_dstore_internal = opal_dstore.open("INTERNAL", NULL, NULL))) {
        error = "opal dstore internal";
        ret = ORTE_ERR_FATAL;
        goto error;
    }

    /* extract the cluster description and setup the routed info - the orcm routed component
     * will know what to do. */
    n = 1;
    if (OPAL_SUCCESS != (ret = opal_dss.unpack(&buf, &clusterbuf, &n, OPAL_BUFFER))) {
        ORTE_ERROR_LOG(ret);
        OBJ_DESTRUCT(&buf);
        error = "extract cluster buf";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, clusterbuf))) {
        ORTE_ERROR_LOG(ret);
        OBJ_DESTRUCT(&buf);
        OBJ_RELEASE(clusterbuf);
        error = "orte_routed.init_routes";
        goto error;
    }
    OBJ_RELEASE(clusterbuf);

    /* extract the uri buffer and load the hash tables */
    n = 1;
    if (OPAL_SUCCESS != (ret = opal_dss.unpack(&buf, &uribuf, &n, OPAL_BUFFER))) {
        ORTE_ERROR_LOG(ret);
        OBJ_DESTRUCT(&buf);
        error = "extract uri buffer";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_rml_base_update_contact_info(uribuf))) {
        ORTE_ERROR_LOG(ret);
        OBJ_DESTRUCT(&buf);
        OBJ_RELEASE(uribuf);
        error = "load hash tables";
        goto error;
    }
    OBJ_DESTRUCT(&buf);
    OBJ_RELEASE(uribuf);

    /*
     * Group communications
     */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_grpcomm_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_grpcomm_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_grpcomm_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_grpcomm_base_select";
        goto error;
    }

    /* Open/select the odls */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_odls_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_odls_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_odls_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_odls_base_select";
        goto error;
    }

    /* enable communication with the rml */
    if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_rml.enable_comm";
        goto error;
    }

    /* setup the FileM */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_filem_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_filem_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_filem_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_filem_base_select";
        goto error;
    }

    /*
     * Initalize the CR setup
     * Note: Always do this, even in non-FT builds.
     * If we don't some user level tools may hang.
     */
    opal_cr_set_enabled(false);
    if (ORTE_SUCCESS != (ret = orte_cr_init())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_cr_init";
        goto error;
    }

    /* setup the ANALYTICS framework */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orcm_analytics_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orcm_analytics_base_open";
        goto error;
    }

    /* setup the EVGEN framework */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orcm_evgen_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orcm_evgen_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orcm_evgen_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orcm_evgen_select";
        goto error;
    }

    /* setup the SENSOR framework */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orcm_sensor_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orcm_sensor_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orcm_sensor_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orcm_sensor_select";
        goto error;
    }
    /* start the local sensors */
    orcm_sensor.start(ORTE_PROC_MY_NAME->jobid);

    /* setup the PWRMGMT framework */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orcm_pwrmgmt_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orcm_pwrmgmt_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orcm_pwrmgmt_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orcm_pwrmgmt_select";
        goto error;
    }

    /* setup the DFS framework */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_dfs_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_dfs_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_dfs_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_dfs_select";
        goto error;
    }

    /* open and setup the DIAG framework */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orcm_diag_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orcm_diag_base_open";
        goto error;
    }
    if (ORCM_SUCCESS != (ret = orcm_diag_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orcm_diag_select";
        goto error;
    }

    return ORTE_SUCCESS;
    
 error:
    orte_show_help("help-orcm-runtime.txt",
                   "orcm_init:startup:internal-failure",
                   true, error, ORTE_ERROR_NAME(ret), ret);
    
    return ORTE_ERR_SILENT;
}
Exemplo n.º 25
0
static int rte_init(void)
{
    int ret;
    char *error = NULL;
    char *envar, *ev1, *ev2;
    uint64_t unique_key[2];
    char *string_key;
    opal_value_t *kv;
    char *val;
    int u32, *u32ptr;
    uint16_t u16, *u16ptr;
    char **peers=NULL, *mycpuset, **cpusets=NULL;
    opal_process_name_t name;
    size_t i;

    /* run the prolog */
    if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) {
        error = "orte_ess_base_std_prolog";
        goto error;
    }

    /* get an async event base - we use the opal_async one so
     * we don't startup extra threads if not needed */
    orte_event_base = opal_progress_thread_init(NULL);
    progress_thread_running = true;

    /* open and setup pmix */
    if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        /* we cannot run */
        error = "pmix init";
        goto error;
    }
    if (OPAL_SUCCESS != (ret = opal_pmix_base_select())) {
        /* we cannot run */
        error = "pmix init";
        goto error;
    }
    /* set the event base */
    opal_pmix_base_set_evbase(orte_event_base);
    /* initialize the selected module */
    if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init()))) {
        /* we cannot run */
        error = "pmix init";
        goto error;
    }
    u32ptr = &u32;
    u16ptr = &u16;

    /****   THE FOLLOWING ARE REQUIRED VALUES   ***/
    /* pmix.init set our process name down in the OPAL layer,
     * so carry it forward here */
    ORTE_PROC_MY_NAME->jobid = OPAL_PROC_MY_NAME.jobid;
    ORTE_PROC_MY_NAME->vpid = OPAL_PROC_MY_NAME.vpid;

    /* get our local rank from PMI */
    OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_RANK,
                          ORTE_PROC_MY_NAME, &u16ptr, OPAL_UINT16);
    if (OPAL_SUCCESS != ret) {
        error = "getting local rank";
        goto error;
    }
    orte_process_info.my_local_rank = u16;

    /* get our node rank from PMI */
    OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_NODE_RANK,
                          ORTE_PROC_MY_NAME, &u16ptr, OPAL_UINT16);
    if (OPAL_SUCCESS != ret) {
        error = "getting node rank";
        goto error;
    }
    orte_process_info.my_node_rank = u16;

    /* get universe size */
    OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_UNIV_SIZE,
                          ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32);
    if (OPAL_SUCCESS != ret) {
        error = "getting univ size";
        goto error;
    }
    orte_process_info.num_procs = u32;
    /* push into the environ for pickup in MPI layer for
     * MPI-3 required info key
     */
    if (NULL == getenv(OPAL_MCA_PREFIX"orte_ess_num_procs")) {
        asprintf(&ev1, OPAL_MCA_PREFIX"orte_ess_num_procs=%d", orte_process_info.num_procs);
        putenv(ev1);
        added_num_procs = true;
    }
    if (NULL == getenv("OMPI_APP_CTX_NUM_PROCS")) {
        asprintf(&ev2, "OMPI_APP_CTX_NUM_PROCS=%d", orte_process_info.num_procs);
        putenv(ev2);
        added_app_ctx = true;
    }


    /* get our app number from PMI - ok if not found */
    OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_APPNUM,
                                   ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32);
    if (OPAL_SUCCESS == ret) {
        orte_process_info.app_num = u32;
    } else {
        orte_process_info.app_num = 0;
    }

    /* get the number of local peers - required for wireup of
     * shared memory BTL */
    OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_SIZE,
                          ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32);
    if (OPAL_SUCCESS == ret) {
        orte_process_info.num_local_peers = u32 - 1;  // want number besides ourselves
    } else {
        orte_process_info.num_local_peers = 0;
    }

    /* setup transport keys in case the MPI layer needs them -
     * we can use the jobfam and stepid as unique keys
     * because they are unique values assigned by the RM
     */
    if (NULL == getenv(OPAL_MCA_PREFIX"orte_precondition_transports")) {
        unique_key[0] = ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid);
        unique_key[1] = ORTE_LOCAL_JOBID(ORTE_PROC_MY_NAME->jobid);
        if (NULL == (string_key = orte_pre_condition_transports_print(unique_key))) {
            ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
            return ORTE_ERR_OUT_OF_RESOURCE;
        }
        asprintf(&envar, OPAL_MCA_PREFIX"orte_precondition_transports=%s", string_key);
        putenv(envar);
        added_transport_keys = true;
        /* cannot free the envar as that messes up our environ */
        free(string_key);
    }

    /* retrieve our topology */
    val = NULL;
    OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCAL_TOPO,
                                   ORTE_PROC_MY_NAME, &val, OPAL_STRING);
    if (OPAL_SUCCESS == ret && NULL != val) {
        /* load the topology */
        if (0 != hwloc_topology_init(&opal_hwloc_topology)) {
            ret = OPAL_ERROR;
            free(val);
            error = "setting topology";
            goto error;
        }
        if (0 != hwloc_topology_set_xmlbuffer(opal_hwloc_topology, val, strlen(val))) {
            ret = OPAL_ERROR;
            free(val);
            hwloc_topology_destroy(opal_hwloc_topology);
            error = "setting topology";
            goto error;
        }
        /* since we are loading this from an external source, we have to
         * explicitly set a flag so hwloc sets things up correctly
         */
        if (0 != hwloc_topology_set_flags(opal_hwloc_topology,
                                         (HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM |
                                          HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM |
                                          HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) {
            ret = OPAL_ERROR;
            hwloc_topology_destroy(opal_hwloc_topology);
            free(val);
            error = "setting topology";
            goto error;
        }
        /* now load the topology */
        if (0 != hwloc_topology_load(opal_hwloc_topology)) {
            ret = OPAL_ERROR;
            hwloc_topology_destroy(opal_hwloc_topology);
            free(val);
            error = "setting topology";
            goto error;
        }
        free(val);
        /* filter the cpus thru any default cpu set */
        if (OPAL_SUCCESS != (ret = opal_hwloc_base_filter_cpus(opal_hwloc_topology))) {
            error = "filtering topology";
            goto error;
        }
    } else {
        /* it wasn't passed down to us, so go get it */
        if (OPAL_SUCCESS != (ret = opal_hwloc_base_get_topology())) {
            error = "topology discovery";
            goto error;
        }
        /* push it into the PMIx database in case someone
         * tries to retrieve it so we avoid an attempt to
         * get it again */
        kv = OBJ_NEW(opal_value_t);
        kv->key = strdup(OPAL_PMIX_LOCAL_TOPO);
        kv->type = OPAL_STRING;
        if (0 != (ret = hwloc_topology_export_xmlbuffer(opal_hwloc_topology, &kv->data.string, &u32))) {
            error = "topology export";
            goto error;
        }
        if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_NAME, kv))) {
            error = "topology store";
            goto error;
        }
        OBJ_RELEASE(kv);
    }

    /* get our local peers */
    if (0 < orte_process_info.num_local_peers) {
        /* if my local rank if too high, then that's an error */
        if (orte_process_info.num_local_peers < orte_process_info.my_local_rank) {
            ret = ORTE_ERR_BAD_PARAM;
            error = "num local peers";
            goto error;
        }
        /* retrieve the local peers */
        OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_PEERS,
                              ORTE_PROC_MY_NAME, &val, OPAL_STRING);
        if (OPAL_SUCCESS == ret && NULL != val) {
            peers = opal_argv_split(val, ',');
            free(val);
            /* and their cpusets, if available */
            OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCAL_CPUSETS, ORTE_PROC_MY_NAME, &val, OPAL_STRING);
            if (OPAL_SUCCESS == ret && NULL != val) {
                cpusets = opal_argv_split(val, ':');
                free(val);
            } else {
                cpusets = NULL;
            }
        } else {
            peers = NULL;
            cpusets = NULL;
        }
    } else {
        peers = NULL;
        cpusets = NULL;
    }

    /* set the locality */
    if (NULL != peers) {
        /* indentify our cpuset */
        if (NULL != cpusets) {
            mycpuset = cpusets[orte_process_info.my_local_rank];
        } else {
            mycpuset = NULL;
        }
        name.jobid = ORTE_PROC_MY_NAME->jobid;
        for (i=0; NULL != peers[i]; i++) {
            kv = OBJ_NEW(opal_value_t);
            kv->key = strdup(OPAL_PMIX_LOCALITY);
            kv->type = OPAL_UINT16;
            name.vpid = strtoul(peers[i], NULL, 10);
            if (name.vpid == ORTE_PROC_MY_NAME->vpid) {
                /* we are fully local to ourselves */
                u16 = OPAL_PROC_ALL_LOCAL;
            } else if (NULL == mycpuset || NULL == cpusets[i] ||
                       0 == strcmp(cpusets[i], "UNBOUND")) {
                /* all we can say is that it shares our node */
                u16 = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE;
            } else {
                /* we have it, so compute the locality */
                u16 = opal_hwloc_base_get_relative_locality(opal_hwloc_topology, mycpuset, cpusets[i]);
            }
            OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output,
                                 "%s ess:pmi:locality: proc %s locality %x",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ORTE_NAME_PRINT(&name), u16));
            kv->data.uint16 = u16;
            ret = opal_pmix.store_local(&name, kv);
            if (OPAL_SUCCESS != ret) {
                error = "local store of locality";
                opal_argv_free(peers);
                opal_argv_free(cpusets);
                goto error;
            }
            OBJ_RELEASE(kv);
        }
        opal_argv_free(peers);
        opal_argv_free(cpusets);
    }

    /* now that we have all required info, complete the setup */
    if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup(false))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_ess_base_app_setup";
        goto error;
    }

    /* setup process binding */
    if (ORTE_SUCCESS != (ret = orte_ess_base_proc_binding())) {
        error = "proc_binding";
        goto error;
    }

    /* this needs to be set to enable debugger use when direct launched */
    if (NULL == orte_process_info.my_daemon_uri) {
        orte_standalone_operation = true;
    }

    /* set max procs */
    if (orte_process_info.max_procs < orte_process_info.num_procs) {
        orte_process_info.max_procs = orte_process_info.num_procs;
    }

    /***  PUSH DATA FOR OTHERS TO FIND   ***/

    /* push our hostname so others can find us, if they need to */
    OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_HOSTNAME, orte_process_info.nodename, OPAL_STRING);
    if (ORTE_SUCCESS != ret) {
        error = "db store hostname";
        goto error;
    }

    /* if we are an ORTE app - and not an MPI app - then
     * we need to exchange our connection info here.
     * MPI_Init has its own modex, so we don't need to do
     * two of them. However, if we don't do a modex at all,
     * then processes have no way to communicate
     *
     * NOTE: only do this when the process originally launches.
     * Cannot do this on a restart as the rest of the processes
     * in the job won't be executing this step, so we would hang
     */
    if (ORTE_PROC_IS_NON_MPI && !orte_do_not_barrier) {
        opal_pmix.fence(NULL, 0);
    }

    return ORTE_SUCCESS;

 error:
    if (!progress_thread_running) {
        /* can't send the help message, so ensure it
         * comes out locally
         */
        orte_show_help_finalize();
    }
    if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) {
        orte_show_help("help-orte-runtime.txt",
                       "orte_init:startup:internal-failure",
                       true, error, ORTE_ERROR_NAME(ret), ret);
    }
    return ret;
}
Exemplo n.º 26
0
static int __create_fca_comm(mca_coll_fca_module_t *fca_module)
{
    int rc, ret;
    int result = MPI_UNEQUAL;
    mca_coll_fca_c_cache_item_t *c_item = NULL, *c_item_new = NULL;
    mca_coll_fca_component_t *cm = &mca_coll_fca_component;
    int comm_size = ompi_comm_size(fca_module->comm);
    ompi_communicator_t *comm = fca_module->comm;
    opal_list_t *c_cache;
    struct timeval start, end, seq_start, seq_end, par_start, par_end;
    int act_deep;


    if(mca_coll_fca_component.fca_verbose == 10) {
        gettimeofday(&start, NULL);
    }

    if(mca_coll_fca_component.fca_enable_cache) {

        c_cache = &cm->c_cache;

        if(mca_coll_fca_component.fca_enable_hash){

            int  grank = ORTE_PROC_MY_NAME->vpid;
            int hash_index, part_of_hash_index;

            if(mca_coll_fca_component.fca_parallel_hash_calc == 1) {

                if(mca_coll_fca_component.fca_verbose == 10){
                    gettimeofday(&par_start, NULL);
                }

                part_of_hash_index = modular_pow(cm->fca_primes[grank % cm->fca_number_of_primes], grank, cm->fca_hash_size);
                rc = comm->c_coll.coll_allreduce(&part_of_hash_index, &hash_index, 1, MPI_INT, MPI_SUM, comm, comm->c_coll.coll_allreduce_module);
                if (rc != OMPI_SUCCESS) {
                    FCA_ERROR("Failed to reduce hash_index: %d", rc);
                    return rc;
                }

                if(mca_coll_fca_component.fca_verbose == 10){
                    gettimeofday(&par_end, NULL);
                    mca_coll_fca_component.fca_work_time_parallel =+
                        par_end.tv_sec - par_start.tv_sec + 1e-6 * (par_end.tv_usec - par_start.tv_usec);
                }
            }else{

                if(mca_coll_fca_component.fca_verbose == 10){
                    gettimeofday(&seq_start, NULL);
                }

                hash_index = 0;
                int q_counter = 0;
                int q_comm_size = ompi_comm_size (comm);

                for(q_counter = 0; q_counter < q_comm_size; q_counter++)
                {
                    hash_index += modular_pow(cm->fca_primes[q_counter % cm->fca_number_of_primes], q_counter, cm->fca_hash_size);
                }

                if(mca_coll_fca_component.fca_verbose == 10){
                    gettimeofday(&seq_end, NULL);
                    mca_coll_fca_component.fca_work_time_sequency =+
                        seq_end.tv_sec - seq_start.tv_sec + 1e-6 * (seq_end.tv_usec - seq_start.tv_usec);
                }

            }

            if(cm->fca_hash[hash_index % cm->fca_hash_size] != NULL)
            {
                c_cache = cm->fca_hash[hash_index % cm->fca_hash_size];
                if(mca_coll_fca_component.fca_verbose == 10) {
                    gettimeofday(&end, NULL);
                    mca_coll_fca_component.fca_total_work_time =+
                        end.tv_sec - start.tv_sec + 1e-6 * (end.tv_usec - start.tv_usec);
                    mca_coll_fca_component.fca_hash_hit += 1;
                }
            }else
            {
                if(mca_coll_fca_component.fca_verbose == 10) {
                    mca_coll_fca_component.fca_hash_miss += 1;
                }
                c_cache = OBJ_NEW(opal_list_t);
                cm->fca_hash[hash_index % cm->fca_hash_size] = c_cache;
            }

        }

        act_deep = 0;
        for( c_item = (mca_coll_fca_c_cache_item_t *)opal_list_get_first(c_cache);
                c_item != (mca_coll_fca_c_cache_item_t *)opal_list_get_end(c_cache);
                c_item = (mca_coll_fca_c_cache_item_t *)opal_list_get_next((opal_list_item_t *) c_item)){
            act_deep++;
            /* first check the size */
            if( c_item && (comm_size == c_item->size)) {
                /* then we have a potential cache hit */
                ompi_comm_compare(comm, c_item->comm, &result);
                if( MPI_CONGRUENT == result) {
                    /* cache hit! Return the context and be done with it */
                    /* first bump the score */
                    ret = fca_comm_get_caps(c_item->fca_comm_wrap->fca_comm, &fca_module->fca_comm_caps);
                    if (ret < 0) {
                        FCA_ERROR("GET_COMM_CAPS failed: %s", fca_strerror(ret));
                        return OMPI_ERROR;
                    }
                    fca_module->fca_comm = c_item->fca_comm_wrap->fca_comm;

                    if(mca_coll_fca_component.fca_verbose == 10) {
                        gettimeofday(&end, NULL);

                        mca_coll_fca_component.fca_total_work_time =+
                            end.tv_sec - start.tv_sec + 1e-6 * (end.tv_usec - start.tv_usec);

                        mca_coll_fca_component.fca_cache_hit += 1;

                        if(act_deep>mca_coll_fca_component.fca_max_deep_in_cache)
                            mca_coll_fca_component.fca_max_deep_in_cache = act_deep;
                    }
                    return OMPI_SUCCESS;
                }
            }
        }
    }
    rc = __fca_comm_new(fca_module);
    if (OMPI_SUCCESS != rc) {
        return rc;
    }
#if OMPI_FCA_VERSION < 30
    /* allocate comm_init_spec */
    FCA_MODULE_VERBOSE(fca_module, 1, "Starting COMM_INIT comm_id %d proc_idx %d num_procs %d",
                       fca_module->fca_comm_desc.comm_id, fca_module->local_proc_idx,
                       fca_module->num_local_procs);

    ret = mca_coll_fca_comm_init(mca_coll_fca_component.fca_context,
                                 fca_module->rank, ompi_comm_size(fca_module->comm),
                                 fca_module->local_proc_idx, fca_module->num_local_procs,
                                 &fca_module->fca_comm_desc, &fca_module->fca_comm);
    if (ret < 0) {
        FCA_ERROR("COMM_INIT failed: %s", fca_strerror(ret));
        return OMPI_ERROR;
     }
#endif
    /* get communicator capabilities */
    ret = fca_comm_get_caps(fca_module->fca_comm, &fca_module->fca_comm_caps);
    if (ret < 0) {
        FCA_ERROR("GET_COMM_CAPS failed: %s", fca_strerror(ret));
        return OMPI_ERROR;
    }

    /* by this point every rank in the communicator is set up */
    FCA_MODULE_VERBOSE(fca_module, 1, "Initialized FCA communicator, comm_id %d",
            fca_module->fca_comm_desc.comm_id);
    if(mca_coll_fca_component.fca_enable_cache) {

        c_item_new = OBJ_NEW(mca_coll_fca_c_cache_item_t);
        c_item_new->fca_comm_wrap = OBJ_NEW(mca_coll_fca_comm_wrap_t);

        OBJ_RETAIN(comm);

        c_item_new->size = comm_size;
        c_item_new->comm = comm;
        c_item_new->fca_comm_wrap->fca_comm = fca_module->fca_comm;
        c_item_new->fca_comm_wrap->rank = fca_module->rank;
        c_item_new->fca_comm_wrap->comm_id = fca_module->fca_comm_desc.comm_id;

        opal_list_append(c_cache,(opal_list_item_t *) c_item_new);
    }

    if(mca_coll_fca_component.fca_verbose == 10) {

        gettimeofday(&end, NULL);

        mca_coll_fca_component.fca_total_work_time =+
            end.tv_sec - start.tv_sec + 1e-6 * (end.tv_usec - start.tv_usec);

        mca_coll_fca_component.fca_cache_miss += 1;
    }
    return OMPI_SUCCESS;
}
Exemplo n.º 27
0
/*
 * This function performs both the get and set operations on the epoch for a
 * sepcific process name. If the epoch passed into the function is
 * ORTE_EPOCH_INVALID, then we are performing a get operation. If the epoch is
 * anything else, we are performing a set operation.
 */
orte_epoch_t get_epoch_from_orte_job_data(orte_process_name_t *proc, orte_epoch_t epoch) {
    int ret, i;
    unsigned int j;
    orte_job_t *jdata;
    orte_proc_t *pdata;

    if (ORTE_JOBID_INVALID == proc->jobid || 
        ORTE_VPID_INVALID  == proc->vpid) {
        return ORTE_EPOCH_INVALID;
    }

    /* Sanity check just to make sure we don't overwrite our existing
     * orte_job_data.
     */
    if (NULL == orte_job_data) {
        orte_job_data = OBJ_NEW(opal_pointer_array_t);
        if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_job_data,
                                        1,
                                        ORTE_GLOBAL_ARRAY_MAX_SIZE,
                                        1))) {
            ORTE_ERROR_LOG(ret);
            return ORTE_EPOCH_INVALID;
        }
    }

    /* Look to see if the job is in the orte_job_data. */
    for (i = 0; i < orte_job_data->size; i++) {
        if (NULL == (jdata = (orte_job_t *) opal_pointer_array_get_item(orte_job_data, i))) {
            continue;
        } else if (proc->jobid == jdata->jobid) {
            /* Found the right job, now look for the process. */
            for (j = 0; j < jdata->num_procs; j++) {
                if (NULL == (pdata = (orte_proc_t *) opal_pointer_array_get_item(jdata->procs, j))) {
                    continue;
                } else if (proc->vpid == pdata->name.vpid) {
                    if (ORTE_EPOCH_INVALID != epoch) {
                        pdata->name.epoch = epoch;
                    }
                    return pdata->name.epoch;
                }
            }

            /* Found the right job but didn't find the process in it. Create the
             * process if necessary.
             */
            if (ORTE_EPOCH_INVALID != epoch) {
                pdata = OBJ_NEW(orte_proc_t);
                pdata->name.jobid = proc->jobid;
                pdata->name.vpid = proc->vpid;
                pdata->name.epoch = epoch;

                pdata->state = ORTE_PROC_STATE_TERMINATED;

                opal_pointer_array_add(jdata->procs, pdata);
                jdata->num_procs++;

                return pdata->name.epoch;
            } else {
                return ORTE_EPOCH_MIN;
            }
        }
    }

    /* Didn't find the right job, add a new job structure and a new process. */
    if (ORTE_EPOCH_INVALID != epoch) {
        jdata = OBJ_NEW(orte_job_t);
        jdata->jobid = proc->jobid;

        pdata = OBJ_NEW(orte_proc_t);
        pdata->name.jobid = proc->jobid;
        pdata->name.vpid = proc->vpid;
        pdata->name.epoch = epoch;

        pdata->state = ORTE_PROC_STATE_TERMINATED;

        opal_pointer_array_add(jdata->procs, pdata);
        jdata->num_procs++;

        opal_pointer_array_add(orte_job_data, jdata);

        return pdata->name.epoch;
    } else {
        return ORTE_EPOCH_MIN;
    }
}
Exemplo n.º 28
0
/*
 * Copy all the attributes from one MPI object to another.  Called
 * when MPI objects are copied (e.g., back-end actions to
 * MPI_COMM_DUP).
 */
int ompi_attr_copy_all(ompi_attribute_type_t type, void *old_object,
                       void *new_object, opal_hash_table_t *oldattr_hash,
                       opal_hash_table_t *newattr_hash)
{
    int ret;
    int err;
    uint32_t key;
    int flag;
    void *node, *in_node;
    attribute_value_t *old_attr, *new_attr;
    ompi_attribute_keyval_t *hash_value;

    /* If there's nothing to do, just return */
    if (NULL == oldattr_hash) {
        return MPI_SUCCESS;
    }

    OPAL_THREAD_LOCK(&attribute_lock);

    /* Get the first attribute in the object's hash */
    ret = opal_hash_table_get_first_key_uint32(oldattr_hash, &key,
                                               (void **) &old_attr,
                                               &node);

    /* While we still have some attribute in the object's key hash */
    while (OMPI_SUCCESS == ret) {
        in_node = node;

        /* Get the keyval in the main keyval hash - so that we know
           what the copy_attr_fn is */
        err = opal_hash_table_get_value_uint32(keyval_hash, key,
                                               (void **) &hash_value);
        if (OMPI_SUCCESS != err) {
            /* This should not happen! */
            ret = MPI_ERR_INTERN;
            goto out;
        }

        err = 0;
        new_attr = OBJ_NEW(attribute_value_t);
        switch (type) {
        case COMM_ATTR:
            /* Now call the copy_attr_fn */
            COPY_ATTR_CALLBACKS(communicator, old_object, hash_value,
                                old_attr, new_object, new_attr, err);
            break;

        case TYPE_ATTR:
            /* Now call the copy_attr_fn */
            COPY_ATTR_CALLBACKS(datatype, old_object, hash_value,
                                old_attr, new_object, new_attr, err);
            break;

        case WIN_ATTR:
            /* Now call the copy_attr_fn */
            COPY_ATTR_CALLBACKS(win, old_object, hash_value,
                                old_attr, new_object, new_attr, err);
            break;

        default:
            /* This should not happen */
            assert(0);
            break;
        }
        /* Did the callback return non-MPI_SUCCESS? */
        if (0 != err) {
            ret = err;
            goto out;
        }

        /* Hang this off the object's hash */

        /* The COPY_ATTR_CALLBACKS macro will have converted the
           _flag_ callback output value from Fortran's .TRUE. value to
           0/1 (if necessary).  So we only need to check for 0/1 here
           -- not .TRUE. */
        if (1 == flag) {
            if (0 != (hash_value->attr_flag & OMPI_KEYVAL_F77)) {
                if (0 != (hash_value->attr_flag & OMPI_KEYVAL_F77_INT)) {
                    new_attr->av_set_from = OMPI_ATTRIBUTE_FINT;
                } else {
                    new_attr->av_set_from = OMPI_ATTRIBUTE_AINT;
                }
            } else {
                new_attr->av_set_from = OMPI_ATTRIBUTE_C;
            }
            ret = set_value(type, new_object, &newattr_hash, key,
                            new_attr, true);
            if (MPI_SUCCESS != ret) {
                goto out;
            }
        } else {
            OBJ_RELEASE(new_attr);
        }

        ret = opal_hash_table_get_next_key_uint32(oldattr_hash, &key,
                                                  (void **) &old_attr,
                                                  in_node, &node);
    }
    ret = MPI_SUCCESS;

 out:
    /* All done */
    opal_atomic_wmb();
    OPAL_THREAD_UNLOCK(&attribute_lock);
    return ret;
}
Exemplo n.º 29
0
int orte_util_decode_nodemap(opal_byte_object_t *bo)
{
    int n;
    int32_t num_nodes, i, num_daemons;
    orte_nid_t *node;
    orte_vpid_t *vpids;
    orte_nid_t *nd, *ndptr;
    opal_buffer_t buf;
    int rc;
    uint8_t *oversub;

    OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
                         "%s decode:nidmap decoding nodemap",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));

    /* if there are any entries already in the node array, clear it out */
    if (0 < orte_nidmap.size) {
        /* unfortunately, the opal function "remove_all" doesn't release
         * the memory pointed to by the elements in the array, so we need
         * to release those first
         */
        for (i=0; i < orte_nidmap.size; i++) {
            if (NULL != (ndptr = (orte_nid_t*)opal_pointer_array_get_item(&orte_nidmap, i))) {
                OBJ_RELEASE(ndptr);
            }
        }
        /* now use the opal function to reset the internal pointers */
        opal_pointer_array_remove_all(&orte_nidmap);
    }
    
    /* xfer the byte object to a buffer for unpacking */
    OBJ_CONSTRUCT(&buf, opal_buffer_t);
    opal_dss.load(&buf, bo->bytes, bo->size);
    
    /* unpack number of nodes */
    n=1;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &num_nodes, &n, OPAL_INT32))) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }
 
    OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
                         "%s decode:nidmap decoding %d nodes",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), num_nodes));
    
    /* set the size of the nidmap storage so we minimize realloc's */
    if (ORTE_SUCCESS != (rc = opal_pointer_array_set_size(&orte_nidmap, num_nodes))) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }
    
    /* loop over nodes and unpack the raw nodename */
    for (i=0; i < num_nodes; i++) {
        node = OBJ_NEW(orte_nid_t);
        /* the arch defaults to our arch so that non-hetero
         * case will yield correct behavior
         */
        opal_pointer_array_set_item(&orte_nidmap, i, node);
        
        /* unpack the node's name */
        n=1;
        if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &(node->name), &n, OPAL_STRING))) {
            ORTE_ERROR_LOG(rc);
            return rc;
        }
    }
    
    /* unpack the daemon vpids */
    vpids = (orte_vpid_t*)malloc(num_nodes * sizeof(orte_vpid_t));
    n=num_nodes;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, vpids, &n, ORTE_VPID))) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }

    /* unpack the oversubscribed flags */
    oversub = (uint8_t*)malloc(num_nodes * sizeof(uint8_t));
    n=num_nodes;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, oversub, &n, OPAL_UINT8))) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }

    /* transfer the data to the nidmap, counting the number of
     * daemons in the system
     */
    num_daemons = 0;
    for (i=0; i < num_nodes; i++) {
        if (NULL != (ndptr = (orte_nid_t*)opal_pointer_array_get_item(&orte_nidmap, i))) {
            ndptr->daemon = vpids[i];
            if (0 == oversub[i]) {
                ndptr->oversubscribed = false;
            } else {
                ndptr->oversubscribed = true;
            }
            if (ORTE_VPID_INVALID != vpids[i]) {
                ++num_daemons;
            }
        }
    }
    free(vpids);
    free(oversub);

    /* if we are a daemon or the HNP, update our num_procs */
    if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_DAEMON) {
        orte_process_info.num_procs = num_daemons;

        if (orte_process_info.max_procs < orte_process_info.num_procs) {
            orte_process_info.max_procs = orte_process_info.num_procs;
        }
    }
    /* update num_daemons */
    orte_process_info.num_daemons = num_daemons;
    
    if (0 < opal_output_get_verbosity(orte_debug_output)) {
        for (i=0; i < num_nodes; i++) {
            if (NULL == (nd = (orte_nid_t*)opal_pointer_array_get_item(&orte_nidmap, i))) {
                continue;
            }
            opal_output(0, "%s node[%d].name %s daemon %s",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), i,
                        (NULL == nd->name) ? "NULL" : nd->name,
                        ORTE_VPID_PRINT(nd->daemon));
        }
    }

    OBJ_DESTRUCT(&buf);
    return ORTE_SUCCESS;
}
/*
 * Function to find as many components of a given type as possible.  This
 * includes statically-linked in components as well as opening up a
 * directory and looking for shared-library MCA components of the
 * appropriate type (load them if available).
 *
 * Return one consolidated array of (mca_base_component_t*) pointing to all
 * available components.
 */
int mca_base_component_find(const char *directory, const char *type, 
                            const mca_base_component_t *static_components[], 
                            char **requested_component_names,
                            bool include_mode,
                            opal_list_t *found_components,
                            bool open_dso_components)
{
    int i;
    opal_list_item_t *item;
    mca_base_component_list_item_t *cli;

    /* Find all the components that were statically linked in */
    OBJ_CONSTRUCT(found_components, opal_list_t);
    for (i = 0; NULL != static_components[i]; ++i) {
        if ( use_component(include_mode,
                           (const char**)requested_component_names,
                           static_components[i]->mca_component_name) ) {
            cli = OBJ_NEW(mca_base_component_list_item_t);
            if (NULL == cli) {
                return OPAL_ERR_OUT_OF_RESOURCE;
            }
            cli->cli_component = static_components[i];
            opal_list_append(found_components, (opal_list_item_t *) cli);
        }
    }

#if OMPI_WANT_LIBLTDL
    /* Find any available dynamic components in the specified directory */
    if (open_dso_components) {
        int param, param_disable_dlopen;
        param = mca_base_param_find("mca", NULL, "component_disable_dlopen");
        mca_base_param_lookup_int(param, &param_disable_dlopen);

        if (0 == param_disable_dlopen) {
            find_dyn_components(directory, type,
                                (const char**)requested_component_names,
                                include_mode, found_components); 
        }
    } else {
        opal_output_verbose(40, 0, 
                            "mca: base: component_find: dso loading for %s MCA components disabled", 
                            type);
    }
#endif

    /* Ensure that *all* requested components exist.  Print a warning
       and abort if they do not. */
    for (i = 0; include_mode && NULL != requested_component_names && 
             NULL != requested_component_names[i]; ++i) {
        for (item = opal_list_get_first(found_components);
             opal_list_get_end(found_components) != item; 
             item = opal_list_get_next(item)) {
            cli = (mca_base_component_list_item_t*) item;
            if (0 == strcmp(requested_component_names[i], 
                            cli->cli_component->mca_component_name)) {
                break;
            }
        }

        if (opal_list_get_end(found_components) == item) {
            char h[MAXHOSTNAMELEN];
            gethostname(h, sizeof(h));
            opal_show_help("help-mca-base.txt", 
                           "find-available:not-valid", true,
                           h, type, requested_component_names[i]);
            return OPAL_ERR_NOT_FOUND;
        }
    }

    /* All done */

    return OPAL_SUCCESS;
}