static int _create_fca_comm(mca_scoll_fca_module_t *fca_module) { int comm_size; int rc, ret; rc = _fca_comm_new(fca_module); if (rc != OSHMEM_SUCCESS) return rc; /* allocate comm_init_spec */ FCA_MODULE_VERBOSE(fca_module, 1, "Starting COMM_INIT comm_id %d proc_idx %d num_procs %d", fca_module->fca_comm_desc.comm_id, fca_module->local_proc_idx, fca_module->num_local_procs); comm_size = fca_module->comm->proc_count; ret = mca_scoll_fca_comm_init(mca_scoll_fca_component.fca_context, oshmem_proc_group_find_id(fca_module->comm, fca_module->rank), comm_size, fca_module->local_proc_idx, fca_module->num_local_procs, &fca_module->fca_comm_desc, &fca_module->fca_comm); if (ret < 0) { FCA_ERROR("COMM_INIT failed: %s", fca_strerror(ret)); return OSHMEM_ERROR; } /* get communicator capabilities */ ret = fca_comm_get_caps(fca_module->fca_comm, &fca_module->fca_comm_caps); if (ret < 0) { FCA_ERROR("GET_COMM_CAPS failed: %s", fca_strerror(ret)); return OSHMEM_ERROR; } /* by this point every rank in the communicator is set up */ FCA_MODULE_VERBOSE(fca_module, 1, "Initialized FCA communicator, comm_id %d", fca_module->fca_comm_desc.comm_id); return OSHMEM_SUCCESS; }
static int _fca_comm_new(mca_scoll_fca_module_t *fca_module) { struct oshmem_group_t *comm = fca_module->comm; fca_comm_new_spec_t spec; int info_size = 0, all_info_size = 0; void *all_info = NULL, *my_info = NULL; int *disps = NULL; int i; const int root_pe = oshmem_proc_pe(comm->proc_array[root_id]); const int my_id = oshmem_proc_group_find_id(comm, comm->my_pe); /* call fca_get_rank_info() on node managers only*/ if (fca_module->local_proc_idx == 0) { my_info = fca_get_rank_info(mca_scoll_fca_component.fca_context, &info_size); if (!my_info) { FCA_ERROR("fca_get_rank_info returned NULL"); return OSHMEM_ERROR; } } else { info_size = 0; } FCA_MODULE_VERBOSE(fca_module, 1, "Info size: %d", info_size); for (i = 0; i < comm->proc_count; i++) { mca_scoll_fca_component.rcounts[i] = -1; } _internal_barrier(fca_module); MCA_SPML_CALL(put((void *)&mca_scoll_fca_component.rcounts[my_id], (size_t)sizeof(info_size), (void *)&info_size, root_pe)); if (root_pe == comm->my_pe) { int value = -1; for (i = 0; i < comm->proc_count; i++) { MCA_SPML_CALL(wait((void *)&mca_scoll_fca_component.rcounts[i], SHMEM_CMP_NE, &value, SHMEM_INT)); } } /* Allocate buffer for gathering rank information on rank0 */ if (root_pe == comm->my_pe) { all_info_size = 0; disps = calloc(comm->proc_count, sizeof *disps); for (i = 0; i < comm->proc_count; ++i) { disps[i] = all_info_size; all_info_size += mca_scoll_fca_component.rcounts[i]; } all_info = NULL; FCA_MODULE_VERBOSE(fca_module, 1, "Total rank_info size: %d", all_info_size); all_info = malloc(all_info_size); memset(all_info, 0, all_info_size); } if (my_info) { memcpy(mca_scoll_fca_component.my_info_exchangeable, my_info, info_size); } _internal_barrier(fca_module); if (root_pe == comm->my_pe) { for (i = 0; i < comm->proc_count; i++) { if (mca_scoll_fca_component.rcounts[i] > 0) { MCA_SPML_CALL(get((void *)mca_scoll_fca_component.my_info_exchangeable, mca_scoll_fca_component.rcounts[i], (void*)(((char*)all_info)+disps[i]),comm->proc_array[i]->super.proc_name.vpid)); } } } /* Rank0 calls fca_comm_new() and fills fca_comm_spec filed */ if (root_pe == comm->my_pe) { spec.rank_info = all_info; spec.is_comm_world = comm == oshmem_group_all; spec.rank_count = 0; for (i = 0; i < comm->proc_count; ++i) { FCA_MODULE_VERBOSE(fca_module, 1, "rcounts[%d]=%d disps[%d]=%d", i, mca_scoll_fca_component.rcounts[i], i, disps[i]); if (mca_scoll_fca_component.rcounts[i] > 0) ++spec.rank_count; } FCA_MODULE_VERBOSE(fca_module, 1, "starting fca_comm_new(), rank_count: %d", spec.rank_count); *mca_scoll_fca_component.ret = fca_comm_new(mca_scoll_fca_component.fca_context, &spec, &fca_module->fca_comm_desc); free(disps); free(all_info); } _internal_barrier(fca_module); if (root_pe != comm->my_pe) { MCA_SPML_CALL(get((void *)mca_scoll_fca_component.ret,sizeof(int), (void *)mca_scoll_fca_component.ret, root_pe)); } /* Examine comm_new return value */ _internal_barrier(fca_module); if (*mca_scoll_fca_component.ret < 0) { FCA_ERROR("rank %i: COMM_NEW failed: %s", fca_module->rank, fca_strerror(*mca_scoll_fca_component.ret)); return OSHMEM_ERROR; } /* Release allocate rank_info on node managers */ if (fca_module->local_proc_idx == 0) { fca_free_rank_info(my_info); } { if (root_pe == comm->my_pe) { memcpy(mca_scoll_fca_component.fca_comm_desc_exchangeable, &fca_module->fca_comm_desc, sizeof(fca_module->fca_comm_desc)); } _internal_barrier(fca_module); if (root_pe != comm->my_pe) { MCA_SPML_CALL(get((void *)mca_scoll_fca_component.fca_comm_desc_exchangeable, sizeof(fca_module->fca_comm_desc), (void *)&fca_module->fca_comm_desc, root_pe)); } _internal_barrier(fca_module); } FCA_MODULE_VERBOSE(fca_module, 1, "Received FCA communicator spec, comm_id %d", fca_module->fca_comm_desc.comm_id); return OSHMEM_SUCCESS; }
/* The Binomial Spanning Tree algorithm. Outlay: The game scales with log2(NP) and uses 1 byte of memory. */ static int __algorithm_binomial_tree(struct oshmem_group_t *group, int PE_root, void *target, const void *source, size_t nlong, long *pSync) { int rc = OSHMEM_SUCCESS; long value = SHMEM_SYNC_INIT; int root_id = oshmem_proc_group_find_id(group, PE_root); int my_id = oshmem_proc_group_find_id(group, group->my_pe); int peer_id = 0; int peer_pe = 0; int vrank; int dim = opal_cube_dim(group->proc_count); int hibit; int mask; int i = 0; SCOLL_VERBOSE(12, "[#%d] Broadcast algorithm: Tree", group->my_pe); SCOLL_VERBOSE(15, "[#%d] pSync[0] = %ld root = #%d", group->my_pe, pSync[0], PE_root); vrank = (my_id + group->proc_count - root_id) % group->proc_count; hibit = opal_hibit(vrank, dim); SCOLL_VERBOSE(15, "[#%d] dim = %d vrank = %d hibit = %d", group->my_pe, dim, vrank, hibit); dim--; pSync[0] = SHMEM_SYNC_READY; /* Receive data from parent in the tree. */ if (vrank > 0) { value = SHMEM_SYNC_READY; SCOLL_VERBOSE(14, "[#%d] wait", group->my_pe); rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_NE, (void*)&value, SHMEM_LONG)); while ((value = pSync[0]) < 0) { SCOLL_VERBOSE(14, "[#%d] Broadcast size is a negative value (%li)\n", group->my_pe, pSync[0]); MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_NE, (void*)&value, SHMEM_LONG)); } if (OSHMEM_SUCCESS != rc) { return rc; } nlong = (size_t) pSync[0]; } /* Send data to the children. */ for (i = hibit + 1, mask = 1 << i; i <= dim; ++i, mask <<= 1) { peer_id = vrank | mask; if (peer_id < group->proc_count) { /* Wait for the child to be ready to receive (pSync must have the initial value) */ peer_id = (peer_id + root_id) % group->proc_count; peer_pe = oshmem_proc_pe(group->proc_array[peer_id]); SCOLL_VERBOSE(14, "[#%d] check remote pe is ready to receive #%d", group->my_pe, peer_pe); do { rc = MCA_SPML_CALL(get((void*)pSync, sizeof(long), (void*)pSync, peer_pe)); } while ((OSHMEM_SUCCESS == rc) && (pSync[0] != SHMEM_SYNC_READY)); SCOLL_VERBOSE(14, "[#%d] send data to #%d", group->my_pe, peer_pe); rc = MCA_SPML_CALL(put(target, nlong, (my_id == root_id ? (void *)source : target), peer_pe)); MCA_SPML_CALL(fence()); SCOLL_VERBOSE(14, "[#%d] signals to #%d", group->my_pe, peer_pe); value = nlong; rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe)); if (OSHMEM_SUCCESS != rc) { break; } } } return rc; }