static int hcoll_open(void) { mca_coll_hcoll_component_t *cm; cm = &mca_coll_hcoll_component; mca_coll_hcoll_output = opal_output_open(NULL); opal_output_set_verbosity(mca_coll_hcoll_output, cm->hcoll_verbose); hcoll_rte_fns_setup(); cm->libhcoll_initialized = false; /* Register memory hooks */ if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) == ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & opal_mem_hooks_support_level())) { setenv("MXM_HCOLL_MEM_ON_DEMAND_MAP", "y", 0); HCOL_VERBOSE(1, "Enabling on-demand memory mapping"); cm->using_mem_hooks = 1; } else { HCOL_VERBOSE(1, "Disabling on-demand memory mapping"); cm->using_mem_hooks = 0; } return OMPI_SUCCESS; }
int mca_coll_hcoll_ialltoallv(const void *sbuf, int *scounts, int *sdisps, struct ompi_datatype_t *sdtype, void *rbuf, int *rcounts, int *rdisps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t *module) { dte_data_representation_t stype; dte_data_representation_t rtype; int rc; HCOL_VERBOSE(20,"RUNNING HCOL IALLTOALLV"); mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; stype = ompi_dtype_2_hcoll_dtype(sdtype, NO_DERIVED); rtype = ompi_dtype_2_hcoll_dtype(rdtype, NO_DERIVED); if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(stype) || HCOL_DTE_IS_ZERO(rtype))) { HCOL_VERBOSE(20,"Ompi_datatype is not supported: sdtype = %s, rdtype = %s; calling fallback ialltoallv;", sdtype->super.name, rdtype->super.name); rc = hcoll_module->previous_ialltoallv(sbuf, scounts, sdisps, sdtype, rbuf, rcounts, rdisps, rdtype, comm, request, hcoll_module->previous_alltoallv_module); return rc; } rc = hcoll_collectives.coll_ialltoallv((void *)sbuf, (int *)scounts, (int *)sdisps, stype, rbuf, (int *)rcounts, (int *)rdisps, rtype, hcoll_module->hcoll_context, (void**)request); if (HCOLL_SUCCESS != rc){ HCOL_VERBOSE(20,"RUNNING FALLBACK IALLTOALLV"); rc = hcoll_module->previous_ialltoallv(sbuf, scounts, sdisps, sdtype, rbuf, rcounts, rdisps, rdtype, comm, request, hcoll_module->previous_alltoallv_module); } return rc; }
static int hcoll_close(void) { int rc; mca_coll_hcoll_component_t *cm; cm = &mca_coll_hcoll_component; if (false == cm->libhcoll_initialized) { return OMPI_SUCCESS; } if (cm->using_mem_hooks) { opal_mem_hooks_unregister_release(mca_coll_hcoll_mem_release_cb); } #if HCOLL_API >= HCOLL_VERSION(3,2) hcoll_free_init_opts(cm->init_opts); #endif HCOL_VERBOSE(5,"HCOLL FINALIZE"); rc = hcoll_finalize(); opal_progress_unregister(mca_coll_hcoll_progress); if (HCOLL_SUCCESS != rc){ HCOL_VERBOSE(1,"Hcol library finalize failed"); return OMPI_ERROR; } return OMPI_SUCCESS; }
int mca_coll_hcoll_ibcast(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t *module) { dte_data_representation_t dtype; int rc; void** rt_handle; HCOL_VERBOSE(20,"RUNNING HCOL NON-BLOCKING BCAST"); mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; rt_handle = (void**) request; dtype = ompi_dtype_2_hcoll_dtype(datatype, TRY_FIND_DERIVED); if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(dtype))){ /*If we are here then datatype is not simple predefined datatype */ /*In future we need to add more complex mapping to the dte_data_representation_t */ /* Now use fallback */ HCOL_VERBOSE(20,"Ompi_datatype is not supported: %s; calling fallback non-blocking bcast;",datatype->super.name); rc = hcoll_module->previous_ibcast(buff,count,datatype,root, comm, request, hcoll_module->previous_ibcast_module); return rc; } rc = hcoll_collectives.coll_ibcast(buff, count, dtype, root, rt_handle, hcoll_module->hcoll_context); if (HCOLL_SUCCESS != rc){ HCOL_VERBOSE(20,"RUNNING FALLBACK NON-BLOCKING BCAST"); rc = hcoll_module->previous_ibcast(buff,count,datatype,root, comm, request, hcoll_module->previous_ibcast_module); } return rc; }
int mca_coll_hcoll_ireduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t *module) { dte_data_representation_t Dtype; hcoll_dte_op_t *Op; int rc; HCOL_VERBOSE(20,"RUNNING HCOL NON-BLOCKING REDUCE"); mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; Dtype = ompi_dtype_2_hcoll_dtype(dtype, NO_DERIVED); void **rt_handle = (void**) request; if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(Dtype))){ /*If we are here then datatype is not simple predefined datatype */ /*In future we need to add more complex mapping to the dte_data_representation_t */ /* Now use fallback */ HCOL_VERBOSE(20,"Ompi_datatype is not supported: dtype = %s; calling fallback non-blocking reduce;", dtype->super.name); rc = hcoll_module->previous_ireduce(sbuf,rbuf,count,dtype,op, root, comm, request, hcoll_module->previous_ireduce_module); return rc; } Op = ompi_op_2_hcolrte_op(op); if (OPAL_UNLIKELY(HCOL_DTE_OP_NULL == Op->id)){ /*If we are here then datatype is not simple predefined datatype */ /*In future we need to add more complex mapping to the dte_data_representation_t */ /* Now use fallback */ HCOL_VERBOSE(20,"ompi_op_t is not supported: op = %s; calling fallback non-blocking reduce;", op->o_name); rc = hcoll_module->previous_ireduce(sbuf,rbuf, count,dtype,op, root, comm, request, hcoll_module->previous_ireduce_module); return rc; } rc = hcoll_collectives.coll_ireduce((void *)sbuf,rbuf,count,Dtype,Op,root,hcoll_module->hcoll_context,rt_handle); if (HCOLL_SUCCESS != rc){ HCOL_VERBOSE(20,"RUNNING FALLBACK NON-BLOCKING REDUCE"); rc = hcoll_module->previous_ireduce(sbuf,rbuf, count,dtype,op, root, comm, request, hcoll_module->previous_ireduce_module); } return rc; }
static int hcoll_close(void) { int rc; HCOL_VERBOSE(5,"HCOLL FINALIZE"); rc = hcoll_finalize(); opal_progress_unregister(mca_coll_hcoll_progress); OBJ_DESTRUCT(&mca_coll_hcoll_component.active_modules); memset(&mca_coll_hcoll_component.active_modules,0,sizeof(mca_coll_hcoll_component.active_modules)); if (HCOLL_SUCCESS != rc){ HCOL_VERBOSE(1,"Hcol library finalize failed"); return OMPI_ERROR; } return OMPI_SUCCESS; }
/* * Invoked when there's a new communicator that has been created. * Look at the communicator and decide which set of functions and * priority we want to return. */ mca_coll_base_module_t * mca_coll_hcoll_comm_query(struct ompi_communicator_t *comm, int *priority) { mca_coll_base_module_t *module; mca_coll_hcoll_module_t *hcoll_module; static bool libhcoll_initialized = false; *priority = 0; module = NULL; if (!mca_coll_hcoll_component.hcoll_enable){ goto exit; } if (!libhcoll_initialized) { /* libhcoll should be initialized here since current implmentation of mxm bcol in libhcoll needs world_group fully functional during init world_group, i.e. ompi_comm_world, is not ready at hcoll component open call */ opal_progress_register(hcoll_progress_fn); int rc = hcoll_init(); if (HCOLL_SUCCESS != rc){ mca_coll_hcoll_component.hcoll_enable = 0; opal_progress_unregister(hcoll_progress_fn); HCOL_VERBOSE(0,"Hcol library init failed"); return NULL; } libhcoll_initialized = true; } hcoll_module = OBJ_NEW(mca_coll_hcoll_module_t); if (!hcoll_module){ goto exit; } if (ompi_comm_size(comm) < 2 || OMPI_COMM_IS_INTER(comm)){ goto exit; } hcoll_module->super.coll_module_enable = mca_coll_hcoll_module_enable; hcoll_module->super.coll_barrier = hcoll_collectives.coll_barrier ? mca_coll_hcoll_barrier : NULL; hcoll_module->super.coll_bcast = hcoll_collectives.coll_bcast ? mca_coll_hcoll_bcast : NULL; hcoll_module->super.coll_allgather = hcoll_collectives.coll_allgather ? mca_coll_hcoll_allgather : NULL; hcoll_module->super.coll_allreduce = hcoll_collectives.coll_allreduce ? mca_coll_hcoll_allreduce : NULL; hcoll_module->super.coll_alltoall = /*hcoll_collectives.coll_alltoall ? mca_coll_hcoll_alltoall : */ NULL; hcoll_module->super.coll_ibarrier = hcoll_collectives.coll_ibarrier ? mca_coll_hcoll_ibarrier : NULL; hcoll_module->super.coll_ibcast = hcoll_collectives.coll_ibcast ? mca_coll_hcoll_ibcast : NULL; hcoll_module->super.coll_iallgather = hcoll_collectives.coll_iallgather ? mca_coll_hcoll_iallgather : NULL; hcoll_module->super.coll_iallreduce = hcoll_collectives.coll_iallreduce ? mca_coll_hcoll_iallreduce : NULL; *priority = mca_coll_hcoll_component.hcoll_priority; module = &hcoll_module->super; exit: return module; }
/* * Initialize module on the communicator */ static int mca_coll_hcoll_module_enable(mca_coll_base_module_t *module, struct ompi_communicator_t *comm) { mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*) module; hcoll_module->comm = comm; if (OMPI_SUCCESS != __save_coll_handlers(hcoll_module)){ HCOL_ERROR("coll_hcol: __save_coll_handlers failed"); return OMPI_ERROR; } hcoll_set_runtime_tag_offset(-100,mca_pml.pml_max_tag); hcoll_module->hcoll_context = hcoll_create_context((rte_grp_handle_t)comm); if (NULL == hcoll_module->hcoll_context){ HCOL_VERBOSE(1,"hcoll_create_context returned NULL"); return OMPI_ERROR; } if (comm != &ompi_mpi_comm_world.comm){ mca_coll_hcoll_module_list_item_wrapper_t *mw = OBJ_NEW(mca_coll_hcoll_module_list_item_wrapper_t); mw->module = hcoll_module; OBJ_RETAIN(hcoll_module->comm); opal_list_append(&mca_coll_hcoll_component.active_modules, (opal_list_item_t*)mw); } return OMPI_SUCCESS; }
int mca_coll_hcoll_ibarrier(struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t *module) { int rc; void** rt_handle; HCOL_VERBOSE(20,"RUNNING HCOL NON-BLOCKING BARRIER"); mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; rt_handle = (void**) request; rc = hcoll_collectives.coll_ibarrier(hcoll_module->hcoll_context, rt_handle); if (HCOLL_SUCCESS != rc){ HCOL_VERBOSE(20,"RUNNING FALLBACK NON-BLOCKING BARRIER"); rc = hcoll_module->previous_ibarrier(comm, request, hcoll_module->previous_ibarrier_module); } return rc; }
int mca_coll_hcoll_iallgatherv(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, const int *rcount, const int *displs, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t *module) { dte_data_representation_t stype; dte_data_representation_t rtype; int rc; HCOL_VERBOSE(20,"RUNNING HCOL NON-BLOCKING ALLGATHERV"); mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; stype = ompi_dtype_2_hcoll_dtype(sdtype, NO_DERIVED); rtype = ompi_dtype_2_hcoll_dtype(rdtype, NO_DERIVED); void **rt_handle = (void **) request; if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(stype) || HCOL_DTE_IS_ZERO(rtype))) { /*If we are here then datatype is not simple predefined datatype */ /*In future we need to add more complex mapping to the dte_data_representation_t */ /* Now use fallback */ HCOL_VERBOSE(20,"Ompi_datatype is not supported: sdtype = %s, rdtype = %s; calling fallback non-blocking allgatherv;", sdtype->super.name, rdtype->super.name); rc = hcoll_module->previous_iallgatherv(sbuf,scount,sdtype, rbuf,rcount, displs, rdtype, comm, request, hcoll_module->previous_iallgatherv_module); return rc; } rc = hcoll_collectives.coll_iallgatherv((void *)sbuf,scount,stype,rbuf,rcount,displs,rtype, hcoll_module->hcoll_context, rt_handle); if (HCOLL_SUCCESS != rc){ HCOL_VERBOSE(20,"RUNNING FALLBACK NON-BLOCKING ALLGATHER"); rc = hcoll_module->previous_iallgatherv(sbuf,scount,sdtype, rbuf,rcount, displs, rdtype, comm, request, hcoll_module->previous_iallgatherv_module); } return rc; }
int mca_coll_hcoll_barrier(struct ompi_communicator_t *comm, mca_coll_base_module_t *module){ int rc; mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; HCOL_VERBOSE(20,"RUNNING HCOL BARRIER"); if (OPAL_UNLIKELY(ompi_mpi_finalize_started)) { HCOL_VERBOSE(5, "In finalize, reverting to previous barrier"); goto orig_barrier; } rc = hcoll_collectives.coll_barrier(hcoll_module->hcoll_context); if (HCOLL_SUCCESS != rc){ HCOL_VERBOSE(20,"RUNNING FALLBACK BARRIER"); rc = hcoll_module->previous_barrier(comm,hcoll_module->previous_barrier_module); } return rc; orig_barrier: return hcoll_module->previous_barrier(comm,hcoll_module->previous_barrier_module); }
/* * Initial query function that is invoked during MPI_INIT, allowing * this module to indicate what level of thread support it provides. */ int mca_coll_hcoll_init_query(bool enable_progress_threads, bool enable_mpi_threads) { #if HCOLL_API < HCOLL_VERSION(3,2) if (enable_mpi_threads) { HCOL_VERBOSE(1, "MPI_THREAD_MULTIPLE not suppported; skipping hcoll component"); return OMPI_ERROR; } #endif return OMPI_SUCCESS; }
static int hcoll_close(void) { int rc; rc = hcoll_finalize(); opal_progress_unregister(hcoll_progress_fn); if (HCOLL_SUCCESS != rc){ HCOL_VERBOSE(1,"Hcol library finalize failed"); return OMPI_ERROR; } return OMPI_SUCCESS; }
static void mca_coll_hcoll_module_destruct(mca_coll_hcoll_module_t *hcoll_module) { int context_destroyed; if (hcoll_module->comm == &ompi_mpi_comm_world.comm) { if (OMPI_SUCCESS != ompi_attr_free_keyval(COMM_ATTR, &hcoll_comm_attr_keyval, 0)) { HCOL_VERBOSE(1,"hcoll ompi_attr_free_keyval failed"); } } /* If the hcoll_context is null then we are destroying the hcoll_module that didn't initialized fallback colls/modules. Then just clear and return. Otherwise release module pointers and destroy hcoll context*/ if (hcoll_module->hcoll_context != NULL) { OBJ_RELEASE_IF_NOT_NULL(hcoll_module->previous_barrier_module); OBJ_RELEASE_IF_NOT_NULL(hcoll_module->previous_bcast_module); OBJ_RELEASE_IF_NOT_NULL(hcoll_module->previous_allreduce_module); OBJ_RELEASE_IF_NOT_NULL(hcoll_module->previous_allgather_module); OBJ_RELEASE_IF_NOT_NULL(hcoll_module->previous_allgatherv_module); OBJ_RELEASE_IF_NOT_NULL(hcoll_module->previous_gatherv_module); OBJ_RELEASE_IF_NOT_NULL(hcoll_module->previous_alltoall_module); OBJ_RELEASE_IF_NOT_NULL(hcoll_module->previous_alltoallv_module); OBJ_RELEASE_IF_NOT_NULL(hcoll_module->previous_reduce_module); OBJ_RELEASE_IF_NOT_NULL(hcoll_module->previous_ibarrier_module); OBJ_RELEASE_IF_NOT_NULL(hcoll_module->previous_ibcast_module); OBJ_RELEASE_IF_NOT_NULL(hcoll_module->previous_iallreduce_module); OBJ_RELEASE_IF_NOT_NULL(hcoll_module->previous_iallgather_module); OBJ_RELEASE_IF_NOT_NULL(hcoll_module->previous_iallgatherv_module); OBJ_RELEASE_IF_NOT_NULL(hcoll_module->previous_igatherv_module); OBJ_RELEASE_IF_NOT_NULL(hcoll_module->previous_ialltoall_module); OBJ_RELEASE_IF_NOT_NULL(hcoll_module->previous_ialltoallv_module); OBJ_RELEASE_IF_NOT_NULL(hcoll_module->previous_ireduce_module); /* OBJ_RELEASE(hcoll_module->previous_allgatherv_module); OBJ_RELEASE(hcoll_module->previous_gather_module); OBJ_RELEASE(hcoll_module->previous_gatherv_module); OBJ_RELEASE(hcoll_module->previous_alltoallw_module); OBJ_RELEASE(hcoll_module->previous_reduce_scatter_module); OBJ_RELEASE(hcoll_module->previous_reduce_module); */ #if !defined(HAVE_HCOLL_CONTEXT_FREE) context_destroyed = 0; hcoll_destroy_context(hcoll_module->hcoll_context, (rte_grp_handle_t)hcoll_module->comm, &context_destroyed); #endif } mca_coll_hcoll_module_clear(hcoll_module); }
int mca_coll_hcoll_allgather(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { dte_data_representation_t stype; dte_data_representation_t rtype; int rc; HCOL_VERBOSE(20,"RUNNING HCOL ALLGATHER"); mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module; stype = ompi_dtype_2_hcoll_dtype(sdtype, TRY_FIND_DERIVED); rtype = ompi_dtype_2_hcoll_dtype(rdtype, TRY_FIND_DERIVED); if (sbuf == MPI_IN_PLACE) { stype = rtype; } if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(stype) || HCOL_DTE_IS_ZERO(rtype))) { /*If we are here then datatype is not simple predefined datatype */ /*In future we need to add more complex mapping to the dte_data_representation_t */ /* Now use fallback */ HCOL_VERBOSE(20,"Ompi_datatype is not supported: sdtype = %s, rdtype = %s; calling fallback allgather;", sdtype->super.name, rdtype->super.name); rc = hcoll_module->previous_allgather(sbuf,scount,sdtype, rbuf,rcount,rdtype, comm, hcoll_module->previous_allgather_module); return rc; } rc = hcoll_collectives.coll_allgather((void *)sbuf,scount,stype,rbuf,rcount,rtype,hcoll_module->hcoll_context); if (HCOLL_SUCCESS != rc){ HCOL_VERBOSE(20,"RUNNING FALLBACK ALLGATHER"); rc = hcoll_module->previous_allgather(sbuf,scount,sdtype, rbuf,rcount,rdtype, comm, hcoll_module->previous_allgather_module); } return rc; }
/* * Initialize module on the communicator */ static int mca_coll_hcoll_module_enable(mca_coll_base_module_t *module, struct ompi_communicator_t *comm) { int ret; if (OMPI_SUCCESS != mca_coll_hcoll_save_coll_handlers((mca_coll_hcoll_module_t *)module)) { HCOL_ERROR("coll_hcol: mca_coll_hcoll_save_coll_handlers failed"); return OMPI_ERROR; } ret = ompi_attr_set_c(COMM_ATTR, comm, &comm->c_keyhash, hcoll_comm_attr_keyval, (void *)module, false); if (OMPI_SUCCESS != ret) { HCOL_VERBOSE(1,"hcoll ompi_attr_set_c failed"); return OMPI_ERROR; } return OMPI_SUCCESS; }
static int hcoll_open(void) { int rc; mca_coll_hcoll_output = opal_output_open(NULL); opal_output_set_verbosity(mca_coll_hcoll_output, mca_coll_hcoll_component.hcoll_verbose); hcoll_rte_fns_setup(); opal_progress_register(hcoll_progress_fn); rc = hcoll_init(); if (HCOLL_SUCCESS != rc){ opal_progress_unregister(hcoll_progress_fn); HCOL_VERBOSE(1,"Hcol library init failed"); return OMPI_ERROR; } return OMPI_SUCCESS; }
static int send_nb( dte_data_representation_t data, uint32_t count, void *buffer, rte_ec_handle_t ec_h, rte_grp_handle_t grp_h, uint32_t tag, rte_request_handle_t *req) { ompi_communicator_t *comm = (ompi_communicator_t *)grp_h; if (! ec_h.handle) { fprintf(stderr,"***Error in hcolrte_rml_send_nb: wrong null argument: " "ec_h.handle = %p, ec_h.rank = %d\n",ec_h.handle,ec_h.rank); return 1; } if (HCOL_DTE_IS_INLINE(data)) { /*do inline nb recv*/ size_t size; ompi_request_t *ompi_req; if (!buffer && !HCOL_DTE_IS_ZERO(data)) { fprintf(stderr, "***Error in hcolrte_rml_send_nb: buffer pointer is NULL" " for non DTE_ZERO INLINE data representation\n"); return 1; } size = (size_t)data.rep.in_line_rep.data_handle.in_line.packed_size*count/8; HCOL_VERBOSE(30,"PML_ISEND: dest = %d: buf = %p: size = %u: comm = %p", ec_h.rank, buffer, (unsigned int)size, (void *)comm); if (MCA_PML_CALL(isend(buffer,size,&(ompi_mpi_unsigned_char.dt),ec_h.rank, tag,MCA_PML_BASE_SEND_STANDARD,comm,&ompi_req))) { return 1; } req->data = (void *)ompi_req; req->status = HCOLRTE_REQUEST_ACTIVE; } else { int total_entries_number; int i; unsigned int j; void *buf; uint64_t len; int repeat_count; struct dte_struct_t * repeat; if (NULL != buffer) { /* We have a full data description & buffer pointer simultaneously. It is ambiguous. Throw a warning since the user might have made a mistake with data reps*/ fprintf(stderr,"Warning: buffer_pointer != NULL for NON-inline data representation: buffer_pointer is ignored.\n"); } total_entries_number = count_total_dte_repeat_entries(&data); repeat = data.rep.general_rep->data_representation.data->repeat; repeat_count = data.rep.general_rep->data_representation.data->repeat_count; for (i=0; i< repeat_count; i++) { for (j=0; j<repeat[i].n_elements; j++) { char *repeat_unit = (char *)&repeat[i]; buf = (void *)(repeat_unit+repeat[i].elements[j].base_offset); len = repeat[i].elements[j].packed_size; send_nb(DTE_BYTE,len,buf,ec_h,grp_h,tag,req); } } } return HCOLL_SUCCESS; }
/* * Invoked when there's a new communicator that has been created. * Look at the communicator and decide which set of functions and * priority we want to return. */ mca_coll_base_module_t * mca_coll_hcoll_comm_query(struct ompi_communicator_t *comm, int *priority) { mca_coll_base_module_t *module; mca_coll_hcoll_module_t *hcoll_module; ompi_attribute_fn_ptr_union_t del_fn; ompi_attribute_fn_ptr_union_t copy_fn; mca_coll_hcoll_component_t *cm; int err; int rc; cm = &mca_coll_hcoll_component; *priority = 0; module = NULL; if (!cm->hcoll_enable) { return NULL; } if (OMPI_COMM_IS_INTER(comm) || ompi_comm_size(comm) < cm->hcoll_np || ompi_comm_size(comm) < 2) { return NULL; } if (!cm->libhcoll_initialized) { /* libhcoll should be initialized here since current implmentation of mxm bcol in libhcoll needs world_group fully functional during init world_group, i.e. ompi_comm_world, is not ready at hcoll component open call */ opal_progress_register(mca_coll_hcoll_progress); HCOL_VERBOSE(10,"Calling hcoll_init();"); #if HCOLL_API >= HCOLL_VERSION(3,2) hcoll_read_init_opts(&cm->init_opts); cm->init_opts->base_tag = MCA_COLL_BASE_TAG_HCOLL_BASE; cm->init_opts->max_tag = mca_pml.pml_max_tag; cm->init_opts->enable_thread_support = ompi_mpi_thread_multiple; rc = hcoll_init_with_opts(&cm->init_opts); #else hcoll_set_runtime_tag_offset(MCA_COLL_BASE_TAG_HCOLL_BASE, mca_pml.pml_max_tag); rc = hcoll_init(); #endif if (HCOLL_SUCCESS != rc) { cm->hcoll_enable = 0; opal_progress_unregister(mca_coll_hcoll_progress); HCOL_ERROR("Hcol library init failed"); return NULL; } #if HCOLL_API >= HCOLL_VERSION(3,2) if (cm->using_mem_hooks && cm->init_opts->mem_hook_needed) { #else if (cm->using_mem_hooks && hcoll_check_mem_release_cb_needed()) { #endif opal_mem_hooks_register_release(mca_coll_hcoll_mem_release_cb, NULL); } else { cm->using_mem_hooks = 0; } copy_fn.attr_communicator_copy_fn = (MPI_Comm_internal_copy_attr_function*) MPI_COMM_NULL_COPY_FN; del_fn.attr_communicator_delete_fn = hcoll_comm_attr_del_fn; err = ompi_attr_create_keyval(COMM_ATTR, copy_fn, del_fn, &hcoll_comm_attr_keyval, NULL ,0, NULL); if (OMPI_SUCCESS != err) { cm->hcoll_enable = 0; hcoll_finalize(); opal_progress_unregister(mca_coll_hcoll_progress); HCOL_ERROR("Hcol comm keyval create failed"); return NULL; } if (mca_coll_hcoll_component.derived_types_support_enabled) { copy_fn.attr_datatype_copy_fn = (MPI_Type_internal_copy_attr_function *) MPI_TYPE_NULL_COPY_FN; del_fn.attr_datatype_delete_fn = hcoll_type_attr_del_fn; err = ompi_attr_create_keyval(TYPE_ATTR, copy_fn, del_fn, &hcoll_type_attr_keyval, NULL ,0, NULL); if (OMPI_SUCCESS != err) { cm->hcoll_enable = 0; hcoll_finalize(); opal_progress_unregister(mca_coll_hcoll_progress); HCOL_ERROR("Hcol type keyval create failed"); return NULL; } } OBJ_CONSTRUCT(&cm->dtypes, opal_free_list_t); opal_free_list_init(&cm->dtypes, sizeof(mca_coll_hcoll_dtype_t), 8, OBJ_CLASS(mca_coll_hcoll_dtype_t), 0, 0, 32, -1, 32, NULL, 0, NULL, NULL, NULL); } hcoll_module = OBJ_NEW(mca_coll_hcoll_module_t); if (!hcoll_module) { if (!cm->libhcoll_initialized) { cm->hcoll_enable = 0; hcoll_finalize(); opal_progress_unregister(mca_coll_hcoll_progress); } return NULL; } hcoll_module->comm = comm; HCOL_VERBOSE(10,"Creating hcoll_context for comm %p, comm_id %d, comm_size %d", (void*)comm,comm->c_contextid,ompi_comm_size(comm)); hcoll_module->hcoll_context = hcoll_create_context((rte_grp_handle_t)comm); if (NULL == hcoll_module->hcoll_context) { HCOL_VERBOSE(1,"hcoll_create_context returned NULL"); OBJ_RELEASE(hcoll_module); if (!cm->libhcoll_initialized) { cm->hcoll_enable = 0; hcoll_finalize(); opal_progress_unregister(mca_coll_hcoll_progress); } return NULL; } hcoll_module->super.coll_module_enable = mca_coll_hcoll_module_enable; hcoll_module->super.coll_barrier = hcoll_collectives.coll_barrier ? mca_coll_hcoll_barrier : NULL; hcoll_module->super.coll_bcast = hcoll_collectives.coll_bcast ? mca_coll_hcoll_bcast : NULL; hcoll_module->super.coll_allgather = hcoll_collectives.coll_allgather ? mca_coll_hcoll_allgather : NULL; hcoll_module->super.coll_allgatherv = hcoll_collectives.coll_allgatherv ? mca_coll_hcoll_allgatherv : NULL; hcoll_module->super.coll_allreduce = hcoll_collectives.coll_allreduce ? mca_coll_hcoll_allreduce : NULL; hcoll_module->super.coll_alltoall = hcoll_collectives.coll_alltoall ? mca_coll_hcoll_alltoall : NULL; hcoll_module->super.coll_alltoallv = hcoll_collectives.coll_alltoallv ? mca_coll_hcoll_alltoallv : NULL; hcoll_module->super.coll_gatherv = hcoll_collectives.coll_gatherv ? mca_coll_hcoll_gatherv : NULL; hcoll_module->super.coll_reduce = hcoll_collectives.coll_reduce ? mca_coll_hcoll_reduce : NULL; hcoll_module->super.coll_ibarrier = hcoll_collectives.coll_ibarrier ? mca_coll_hcoll_ibarrier : NULL; hcoll_module->super.coll_ibcast = hcoll_collectives.coll_ibcast ? mca_coll_hcoll_ibcast : NULL; hcoll_module->super.coll_iallgather = hcoll_collectives.coll_iallgather ? mca_coll_hcoll_iallgather : NULL; #if HCOLL_API >= HCOLL_VERSION(3,5) hcoll_module->super.coll_iallgatherv = hcoll_collectives.coll_iallgatherv ? mca_coll_hcoll_iallgatherv : NULL; #else hcoll_module->super.coll_iallgatherv = NULL; #endif hcoll_module->super.coll_iallreduce = hcoll_collectives.coll_iallreduce ? mca_coll_hcoll_iallreduce : NULL; #if HCOLL_API >= HCOLL_VERSION(3,5) hcoll_module->super.coll_ireduce = hcoll_collectives.coll_ireduce ? mca_coll_hcoll_ireduce : NULL; #else hcoll_module->super.coll_ireduce = NULL; #endif hcoll_module->super.coll_gather = /*hcoll_collectives.coll_gather ? mca_coll_hcoll_gather :*/ NULL; hcoll_module->super.coll_igatherv = hcoll_collectives.coll_igatherv ? mca_coll_hcoll_igatherv : NULL; hcoll_module->super.coll_ialltoall = /*hcoll_collectives.coll_ialltoall ? mca_coll_hcoll_ialltoall : */ NULL; #if HCOLL_API >= HCOLL_VERSION(3,7) hcoll_module->super.coll_ialltoallv = hcoll_collectives.coll_ialltoallv ? mca_coll_hcoll_ialltoallv : NULL; #else hcoll_module->super.coll_ialltoallv = NULL; #endif *priority = cm->hcoll_priority; module = &hcoll_module->super; if (!cm->libhcoll_initialized) { cm->libhcoll_initialized = true; } return module; } OBJ_CLASS_INSTANCE(mca_coll_hcoll_module_t, mca_coll_base_module_t, mca_coll_hcoll_module_construct, mca_coll_hcoll_module_destruct);
/* * Initialize module on the communicator */ static int mca_coll_hcoll_module_enable(mca_coll_base_module_t *module, struct ompi_communicator_t *comm) { mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*) module; hcoll_module->comm = comm; if (OMPI_SUCCESS != __save_coll_handlers(hcoll_module)){ HCOL_ERROR("coll_hcol: __save_coll_handlers failed"); return OMPI_ERROR; } hcoll_set_runtime_tag_offset(-100,mca_pml.pml_max_tag); hcoll_set_rte_halt_flag_address(&ompi_mpi_finalized); hcoll_set_rte_halt_flag_size(sizeof(ompi_mpi_finalized)); hcoll_module->hcoll_context = hcoll_create_context((rte_grp_handle_t)comm); if (NULL == hcoll_module->hcoll_context){ HCOL_VERBOSE(1,"hcoll_create_context returned NULL"); return OMPI_ERROR; } #if 0 { printf("\033[33mrank %d: DOING EXTRA TEST\033[0m\n",ompi_comm_rank(comm)); fflush(stdout); sleep(1); rte_ec_handle_t handle; rte_grp_handle_t world_group = hcoll_rte_functions.rte_world_group_fn(); int peer; const int max_count = 10000000; const int step = max_count/100; int buf = 0; int i; rte_request_handle_t req; peer = (ompi_comm_rank(comm)+1)%2; hcoll_rte_functions.get_ec_handles_fn(1,&peer,world_group,&handle); for (i=1; i<max_count+1; i++){ if (0 == ompi_comm_rank(comm)){ if (i/step*step == i){ printf("%d %% done...\n",i/step);fflush(stdout); } buf = 1; hcoll_rte_functions.send_fn(DTE_INT32,1,&buf,handle,world_group,0,&req); } else { hcoll_rte_functions.recv_fn(DTE_INT32,1,&buf,handle,world_group,0,&req); } int completed = 0; hcoll_rte_functions.test_fn(&req,&completed); while(!completed){ hcoll_rte_functions.test_fn(&req,&completed); /*hcoll_rte_functions.rte_progress_fn();*/ opal_progress(); } } printf("\033[32mrank %d: EXTRA TEST PASS\033[0m\n",ompi_comm_rank(comm)); fflush(stdout); sleep(1); } #endif return OMPI_SUCCESS; }