int ompi_comm_activate_nb (ompi_communicator_t **newcomm, ompi_communicator_t *comm, ompi_communicator_t *bridgecomm, const void *arg0, const void *arg1, bool send_first, int mode, ompi_request_t **req) { ompi_comm_cid_context_t *context; ompi_comm_request_t *request; ompi_request_t *subreq; int ret = 0; context = mca_comm_cid_context_alloc (*newcomm, comm, bridgecomm, arg0, arg1, "activate", send_first, mode); if (NULL == context) { return OMPI_ERR_OUT_OF_RESOURCE; } /* keep track of the pointer so it can be set to MPI_COMM_NULL on failure */ context->newcommp = newcomm; request = ompi_comm_request_get (); if (NULL == request) { OBJ_RELEASE(context); return OMPI_ERR_OUT_OF_RESOURCE; } request->context = &context->super; if (MPI_UNDEFINED != (*newcomm)->c_local_group->grp_my_rank) { /* Initialize the PML stuff in the newcomm */ if ( OMPI_SUCCESS != (ret = MCA_PML_CALL(add_comm(*newcomm))) ) { OBJ_RELEASE(newcomm); OBJ_RELEASE(context); *newcomm = MPI_COMM_NULL; return ret; } OMPI_COMM_SET_PML_ADDED(*newcomm); } /* Step 1: the barrier, after which it is allowed to * send messages over the new communicator */ ret = context->allreduce_fn (&context->ok, &context->ok, 1, MPI_MIN, context, &subreq); if (OMPI_SUCCESS != ret) { ompi_comm_request_return (request); return ret; } ompi_comm_request_schedule_append (request, ompi_comm_activate_nb_complete, &subreq, 1); ompi_comm_request_start (request); *req = &request->super; return OMPI_SUCCESS; }
/* This routine serves two purposes: * - the allreduce acts as a kind of Barrier, * which avoids, that we have incoming fragments * on the new communicator before everybody has set * up the comm structure. * - some components (e.g. the collective MagPIe component * might want to generate new communicators and communicate * using the new comm. Thus, it can just be called after * the 'barrier'. * * The reason that this routine is in comm_cid and not in * comm.c is, that this file contains the allreduce implementations * which are required, and thus we avoid having duplicate code... */ int ompi_comm_activate ( ompi_communicator_t** newcomm, ompi_communicator_t* comm, ompi_communicator_t* bridgecomm, void* local_leader, void* remote_leader, int mode, int send_first ) { int ret = 0; int ok=0, gok=0; ompi_comm_cid_allredfct* allredfnct; /* Step 1: the barrier, after which it is allowed to * send messages over the new communicator */ switch (mode) { case OMPI_COMM_CID_INTRA: allredfnct=(ompi_comm_cid_allredfct*)ompi_comm_allreduce_intra; break; case OMPI_COMM_CID_INTER: allredfnct=(ompi_comm_cid_allredfct*)ompi_comm_allreduce_inter; break; case OMPI_COMM_CID_INTRA_BRIDGE: allredfnct=(ompi_comm_cid_allredfct*)ompi_comm_allreduce_intra_bridge; break; case OMPI_COMM_CID_INTRA_OOB: allredfnct=(ompi_comm_cid_allredfct*)ompi_comm_allreduce_intra_oob; break; default: return MPI_UNDEFINED; break; } if (MPI_UNDEFINED != (*newcomm)->c_local_group->grp_my_rank) { /* Initialize the PML stuff in the newcomm */ if ( OMPI_SUCCESS != (ret = MCA_PML_CALL(add_comm(*newcomm))) ) { goto bail_on_error; } OMPI_COMM_SET_PML_ADDED(*newcomm); } (allredfnct)(&ok, &gok, 1, MPI_MIN, comm, bridgecomm, local_leader, remote_leader, send_first ); /** * Check to see if this process is in the new communicator. * * Specifically, this function is invoked by all proceses in the * old communicator, regardless of whether they are in the new * communicator or not. This is because it is far simpler to use * MPI collective functions on the old communicator to determine * some data for the new communicator (e.g., remote_leader) than * to kludge up our own pseudo-collective routines over just the * processes in the new communicator. Hence, *all* processes in * the old communicator need to invoke this function. * * That being said, only processes in the new communicator need to * select a coll module for the new communicator. More * specifically, proceses who are not in the new communicator * should *not* select a coll module -- for example, * ompi_comm_rank(newcomm) returns MPI_UNDEFINED for processes who * are not in the new communicator. This can cause errors in the * selection / initialization of a coll module. Plus, it's * wasteful -- processes in the new communicator will end up * freeing the new communicator anyway, so we might as well leave * the coll selection as NULL (the coll base comm unselect code * handles that case properly). */ if (MPI_UNDEFINED == (*newcomm)->c_local_group->grp_my_rank) { return OMPI_SUCCESS; } /* Let the collectives components fight over who will do collective on this new comm. */ if (OMPI_SUCCESS != (ret = mca_coll_base_comm_select(*newcomm))) { goto bail_on_error; } /* For an inter communicator, we have to deal with the potential * problem of what is happening if the local_comm that we created * has a lower CID than the parent comm. This is not a problem * as long as the user calls MPI_Comm_free on the inter communicator. * However, if the communicators are not freed by the user but released * by Open MPI in MPI_Finalize, we walk through the list of still available * communicators and free them one by one. Thus, local_comm is freed before * the actual inter-communicator. However, the local_comm pointer in the * inter communicator will still contain the 'previous' address of the local_comm * and thus this will lead to a segmentation violation. In order to prevent * that from happening, we increase the reference counter local_comm * by one if its CID is lower than the parent. We cannot increase however * its reference counter if the CID of local_comm is larger than * the CID of the inter communicators, since a regular MPI_Comm_free would * leave in that the case the local_comm hanging around and thus we would not * recycle CID's properly, which was the reason and the cause for this trouble. */ if ( OMPI_COMM_IS_INTER(*newcomm)) { if ( OMPI_COMM_CID_IS_LOWER(*newcomm, comm)) { OMPI_COMM_SET_EXTRA_RETAIN (*newcomm); OBJ_RETAIN (*newcomm); } } return OMPI_SUCCESS; bail_on_error: OBJ_RELEASE(*newcomm); *newcomm = MPI_COMM_NULL; return ret; }
/* * Initialize comm world/self/null/parent. */ int ompi_comm_init(void) { ompi_group_t *group; size_t size; /* Setup communicator array */ OBJ_CONSTRUCT(&ompi_mpi_communicators, opal_pointer_array_t); if( OPAL_SUCCESS != opal_pointer_array_init(&ompi_mpi_communicators, 0, OMPI_FORTRAN_HANDLE_MAX, 64) ) { return OMPI_ERROR; } /* Setup MPI_COMM_WORLD */ OBJ_CONSTRUCT(&ompi_mpi_comm_world, ompi_communicator_t); group = OBJ_NEW(ompi_group_t); group->grp_proc_pointers = ompi_proc_world(&size); group->grp_proc_count = (int)size; OMPI_GROUP_SET_INTRINSIC (group); OMPI_GROUP_SET_DENSE (group); ompi_set_group_rank(group, ompi_proc_local()); ompi_group_increment_proc_count (group); ompi_mpi_comm_world.comm.c_contextid = 0; ompi_mpi_comm_world.comm.c_id_start_index = 4; ompi_mpi_comm_world.comm.c_id_available = 4; ompi_mpi_comm_world.comm.c_f_to_c_index = 0; ompi_mpi_comm_world.comm.c_my_rank = group->grp_my_rank; ompi_mpi_comm_world.comm.c_local_group = group; ompi_mpi_comm_world.comm.c_remote_group = group; OBJ_RETAIN(ompi_mpi_comm_world.comm.c_remote_group); ompi_mpi_comm_world.comm.c_cube_dim = opal_cube_dim((int)size); ompi_mpi_comm_world.comm.error_handler = &ompi_mpi_errors_are_fatal.eh; OBJ_RETAIN( &ompi_mpi_errors_are_fatal.eh ); OMPI_COMM_SET_PML_ADDED(&ompi_mpi_comm_world.comm); opal_pointer_array_set_item (&ompi_mpi_communicators, 0, &ompi_mpi_comm_world); MEMCHECKER (memset (ompi_mpi_comm_world.comm.c_name, 0, MPI_MAX_OBJECT_NAME)); strncpy (ompi_mpi_comm_world.comm.c_name, "MPI_COMM_WORLD", strlen("MPI_COMM_WORLD")+1 ); ompi_mpi_comm_world.comm.c_flags |= OMPI_COMM_NAMEISSET; ompi_mpi_comm_world.comm.c_flags |= OMPI_COMM_INTRINSIC; /* We have to create a hash (although it is legal to leave this filed NULL -- the attribute accessor functions will intepret this as "there are no attributes cached on this object") because MPI_COMM_WORLD has some predefined attributes. */ ompi_attr_hash_init(&ompi_mpi_comm_world.comm.c_keyhash); /* Setup MPI_COMM_SELF */ OBJ_CONSTRUCT(&ompi_mpi_comm_self, ompi_communicator_t); group = OBJ_NEW(ompi_group_t); group->grp_proc_pointers = ompi_proc_self(&size); group->grp_my_rank = 0; group->grp_proc_count = (int)size; OMPI_GROUP_SET_INTRINSIC (group); OMPI_GROUP_SET_DENSE (group); ompi_mpi_comm_self.comm.c_contextid = 1; ompi_mpi_comm_self.comm.c_f_to_c_index = 1; ompi_mpi_comm_self.comm.c_id_start_index = 20; ompi_mpi_comm_self.comm.c_id_available = 20; ompi_mpi_comm_self.comm.c_my_rank = group->grp_my_rank; ompi_mpi_comm_self.comm.c_local_group = group; ompi_mpi_comm_self.comm.c_remote_group = group; OBJ_RETAIN(ompi_mpi_comm_self.comm.c_remote_group); ompi_mpi_comm_self.comm.error_handler = &ompi_mpi_errors_are_fatal.eh; OBJ_RETAIN( &ompi_mpi_errors_are_fatal.eh ); OMPI_COMM_SET_PML_ADDED(&ompi_mpi_comm_self.comm); opal_pointer_array_set_item (&ompi_mpi_communicators, 1, &ompi_mpi_comm_self); MEMCHECKER (memset (ompi_mpi_comm_self.comm.c_name, 0, MPI_MAX_OBJECT_NAME)); strncpy(ompi_mpi_comm_self.comm.c_name,"MPI_COMM_SELF",strlen("MPI_COMM_SELF")+1); ompi_mpi_comm_self.comm.c_flags |= OMPI_COMM_NAMEISSET; ompi_mpi_comm_self.comm.c_flags |= OMPI_COMM_INTRINSIC; /* We can set MPI_COMM_SELF's keyhash to NULL because it has no predefined attributes. If a user defines an attribute on MPI_COMM_SELF, the keyhash will automatically be created. */ ompi_mpi_comm_self.comm.c_keyhash = NULL; /* Setup MPI_COMM_NULL */ OBJ_CONSTRUCT(&ompi_mpi_comm_null, ompi_communicator_t); ompi_mpi_comm_null.comm.c_local_group = &ompi_mpi_group_null.group; ompi_mpi_comm_null.comm.c_remote_group = &ompi_mpi_group_null.group; OBJ_RETAIN(&ompi_mpi_group_null.group); OBJ_RETAIN(&ompi_mpi_group_null.group); ompi_mpi_comm_null.comm.c_contextid = 2; ompi_mpi_comm_null.comm.c_f_to_c_index = 2; ompi_mpi_comm_null.comm.c_my_rank = MPI_PROC_NULL; ompi_mpi_comm_null.comm.error_handler = &ompi_mpi_errors_are_fatal.eh; OBJ_RETAIN( &ompi_mpi_errors_are_fatal.eh ); opal_pointer_array_set_item (&ompi_mpi_communicators, 2, &ompi_mpi_comm_null); MEMCHECKER (memset (ompi_mpi_comm_null.comm.c_name, 0, MPI_MAX_OBJECT_NAME)); strncpy(ompi_mpi_comm_null.comm.c_name,"MPI_COMM_NULL",strlen("MPI_COMM_NULL")+1); ompi_mpi_comm_null.comm.c_flags |= OMPI_COMM_NAMEISSET; ompi_mpi_comm_null.comm.c_flags |= OMPI_COMM_INTRINSIC; /* Initialize the parent communicator to MPI_COMM_NULL */ ompi_mpi_comm_parent = &ompi_mpi_comm_null.comm; OBJ_RETAIN(&ompi_mpi_comm_null); OBJ_RETAIN(&ompi_mpi_group_null.group); OBJ_RETAIN(&ompi_mpi_errors_are_fatal.eh); /* initialize the comm_reg stuff for multi-threaded comm_cid allocation */ ompi_comm_reg_init(); return OMPI_SUCCESS; }