int ompi_osc_base_get_primitive_type_info(ompi_datatype_t *datatype, ompi_datatype_t **prim_datatype, uint32_t *prim_count) { ompi_datatype_t *primitive_datatype = NULL; size_t datatype_size, primitive_size, primitive_count; primitive_datatype = ompi_datatype_get_single_predefined_type_from_args(datatype); if( NULL == primitive_datatype ) { *prim_count = 0; return OMPI_SUCCESS; } ompi_datatype_type_size( datatype, &datatype_size ); ompi_datatype_type_size( primitive_datatype, &primitive_size ); primitive_count = datatype_size / primitive_size; #if OPAL_ENABLE_DEBUG assert( 0 == (datatype_size % primitive_size) ); #endif /* OPAL_ENABLE_DEBUG */ /* We now have the count as a size_t, convert it to an uint32_t */ *prim_datatype = primitive_datatype; *prim_count = (uint32_t)primitive_count; return OMPI_SUCCESS; }
int ompi_coll_base_sendrecv_actual( const void* sendbuf, size_t scount, ompi_datatype_t* sdatatype, int dest, int stag, void* recvbuf, size_t rcount, ompi_datatype_t* rdatatype, int source, int rtag, struct ompi_communicator_t* comm, ompi_status_public_t* status ) { /* post receive first, then send, then wait... should be fast (I hope) */ int err, line = 0; size_t rtypesize, stypesize; ompi_request_t *req; ompi_status_public_t rstatus; /* post new irecv */ ompi_datatype_type_size(rdatatype, &rtypesize); err = MCA_PML_CALL(irecv( recvbuf, rcount, rdatatype, source, rtag, comm, &req)); if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; } /* send data to children */ ompi_datatype_type_size(sdatatype, &stypesize); err = MCA_PML_CALL(send( sendbuf, scount, sdatatype, dest, stag, MCA_PML_BASE_SEND_STANDARD, comm)); if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; } err = ompi_request_wait( &req, &rstatus); if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; } if (MPI_STATUS_IGNORE != status) { *status = rstatus; } return (MPI_SUCCESS); error_handler: /* Error discovered during the posting of the irecv or send, * and no status is available. */ OPAL_OUTPUT ((ompi_coll_base_framework.framework_output, "%s:%d: Error %d occurred\n", __FILE__, line, err)); (void)line; // silence compiler warning if (MPI_STATUS_IGNORE != status) { status->MPI_ERROR = err; } return (err); }
int ompi_coll_tuned_reduce_intra_binary( void *sendbuf, void *recvbuf, int count, ompi_datatype_t* datatype, ompi_op_t* op, int root, ompi_communicator_t* comm, mca_coll_base_module_t *module, uint32_t segsize, int max_outstanding_reqs ) { int segcount = count; size_t typelng; mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module; mca_coll_tuned_comm_t *data = tuned_module->tuned_data; OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_binary rank %d ss %5d", ompi_comm_rank(comm), segsize)); COLL_TUNED_UPDATE_BINTREE( comm, tuned_module, root ); /** * Determine number of segments and number of elements * sent per operation */ ompi_datatype_type_size( datatype, &typelng ); COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount ); return ompi_coll_tuned_reduce_generic( sendbuf, recvbuf, count, datatype, op, root, comm, module, data->cached_bintree, segcount, max_outstanding_reqs ); }
int mca_coll_monitoring_ibcast(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t *module) { mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; size_t type_size, data_size; const int comm_size = ompi_comm_size(comm); ompi_datatype_type_size(datatype, &type_size); data_size = count * type_size; if( root == ompi_comm_rank(comm) ) { int i, rank; mca_common_monitoring_coll_o2a(data_size * (comm_size - 1), monitoring_module->data); for( i = 0; i < comm_size; ++i ) { if( i == root ) continue; /* No self sending */ /** * If this fails the destination is not part of my MPI_COM_WORLD * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank */ if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm, &rank) ) { mca_common_monitoring_record_coll(rank, data_size); } } } return monitoring_module->real.coll_ibcast(buff, count, datatype, root, comm, request, monitoring_module->real.coll_ibcast_module); }
int mca_coll_monitoring_ireduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t *module) { mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; if( root == ompi_comm_rank(comm) ) { int i, rank; size_t type_size, data_size; const int comm_size = ompi_comm_size(comm); ompi_datatype_type_size(dtype, &type_size); data_size = count * type_size; for( i = 0; i < comm_size; ++i ) { if( root == i ) continue; /* No communication for self */ /** * If this fails the destination is not part of my MPI_COM_WORLD * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank */ if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm->c_remote_group, &rank) ) { mca_common_monitoring_record_coll(rank, data_size); } } mca_common_monitoring_coll_a2o(data_size * (comm_size - 1), monitoring_module->data); } return monitoring_module->real.coll_ireduce(sbuf, rbuf, count, dtype, op, root, comm, request, monitoring_module->real.coll_ireduce_module); }
int ompi_coll_base_reduce_intra_binomial( const void *sendbuf, void *recvbuf, int count, ompi_datatype_t* datatype, ompi_op_t* op, int root, ompi_communicator_t* comm, mca_coll_base_module_t *module, uint32_t segsize, int max_outstanding_reqs ) { int segcount = count; size_t typelng; mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module; mca_coll_base_comm_t *data = base_module->base_data; OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:reduce_intra_binomial rank %d ss %5d", ompi_comm_rank(comm), segsize)); COLL_BASE_UPDATE_IN_ORDER_BMTREE( comm, base_module, root ); /** * Determine number of segments and number of elements * sent per operation */ ompi_datatype_type_size( datatype, &typelng ); COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount ); return ompi_coll_base_reduce_generic( sendbuf, recvbuf, count, datatype, op, root, comm, module, data->cached_in_order_bmtree, segcount, max_outstanding_reqs ); }
int mca_coll_monitoring_ialltoallv(const void *sbuf, const int *scounts, const int *sdisps, struct ompi_datatype_t *sdtype, void *rbuf, const int *rcounts, const int *rdisps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t *module) { mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; size_t type_size, data_size, data_size_aggreg = 0; const int comm_size = ompi_comm_size(comm); const int my_rank = ompi_comm_rank(comm); int i, rank; ompi_datatype_type_size(sdtype, &type_size); for( i = 0; i < comm_size; ++i ) { if( my_rank == i ) continue; /* No communication for self */ data_size = scounts[i] * type_size; /** * If this fails the destination is not part of my MPI_COM_WORLD * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank */ if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm->c_remote_group, &rank) ) { mca_common_monitoring_record_coll(rank, data_size); data_size_aggreg += data_size; } } mca_common_monitoring_coll_a2a(data_size_aggreg, monitoring_module->data); return monitoring_module->real.coll_ialltoallv(sbuf, scounts, sdisps, sdtype, rbuf, rcounts, rdisps, rdtype, comm, request, monitoring_module->real.coll_ialltoallv_module); }
int ompi_coll_tuned_bcast_intra_binomial( void* buffer, int count, struct ompi_datatype_t* datatype, int root, struct ompi_communicator_t* comm, mca_coll_base_module_t *module, uint32_t segsize ) { int segcount = count; size_t typelng; mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module; mca_coll_tuned_comm_t *data = tuned_module->tuned_data; COLL_TUNED_UPDATE_BMTREE( comm, tuned_module, root ); /** * Determine number of elements sent per operation. */ ompi_datatype_type_size( datatype, &typelng ); COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount ); OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_binomial rank %d ss %5d typelng %lu segcount %d", ompi_comm_rank(comm), segsize, (unsigned long)typelng, segcount)); return ompi_coll_tuned_bcast_intra_generic( buffer, count, datatype, root, comm, module, segcount, data->cached_bmtree ); }
int ompi_osc_portals4_put(void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, OPAL_PTRDIFF_TYPE target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win) { int ret; ompi_osc_portals4_module_t *module = (ompi_osc_portals4_module_t*) win->w_osc_module; ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); size_t length; size_t offset; ptl_handle_md_t md_h; void *md_base; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "put: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx", (unsigned long) origin_addr, origin_count, origin_dt->name, target, (int) target_disp, target_count, target_dt->name, (unsigned long) win)); offset = get_displacement(module, target) * target_disp; if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count) || !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) { opal_output(ompi_osc_base_framework.framework_output, "MPI_Put: transfer of non-contiguous memory is not currently supported.\n"); return OMPI_ERR_NOT_SUPPORTED; } else { (void)opal_atomic_add_64(&module->opcount, 1); ret = ompi_datatype_type_size(origin_dt, &length); if (OMPI_SUCCESS != ret) { return ret; } length *= origin_count; ompi_osc_portals4_get_md(origin_addr, module->md_h, &md_h, &md_base); ret = PtlPut(md_h, (ptl_size_t) ((char*) origin_addr - (char*) md_base), length, PTL_ACK_REQ, peer, module->pt_idx, module->match_bits, offset, NULL, 0); if (OMPI_SUCCESS != ret) { return ret; } } return OMPI_SUCCESS; }
int ompi_osc_ucx_get(void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win) { ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t*) win->w_osc_module; ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, target); uint64_t remote_addr = (module->win_info_array[target]).addr + target_disp * OSC_UCX_GET_DISP(module, target); ucp_rkey_h rkey; ptrdiff_t origin_lb, origin_extent, target_lb, target_extent; bool is_origin_contig = false, is_target_contig = false; ucs_status_t status; int ret = OMPI_SUCCESS; ret = check_sync_state(module, target, false); if (ret != OMPI_SUCCESS) { return ret; } if (module->flavor == MPI_WIN_FLAVOR_DYNAMIC) { status = get_dynamic_win_info(remote_addr, module, ep, target); if (status != UCS_OK) { return OMPI_ERROR; } } rkey = (module->win_info_array[target]).rkey; ompi_datatype_get_true_extent(origin_dt, &origin_lb, &origin_extent); ompi_datatype_get_true_extent(target_dt, &target_lb, &target_extent); is_origin_contig = ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count); is_target_contig = ompi_datatype_is_contiguous_memory_layout(target_dt, target_count); if (is_origin_contig && is_target_contig) { /* fast path */ size_t origin_len; ompi_datatype_type_size(origin_dt, &origin_len); origin_len *= origin_count; status = ucp_get_nbi(ep, (void *)((intptr_t)origin_addr + origin_lb), origin_len, remote_addr + target_lb, rkey); if (status != UCS_OK && status != UCS_INPROGRESS) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: ucp_get_nbi failed: %d\n", __FILE__, __LINE__, status); return OMPI_ERROR; } return incr_and_check_ops_num(module, target, ep); } else { return ddt_put_get(module, origin_addr, origin_count, origin_dt, is_origin_contig, origin_lb, target, ep, remote_addr, rkey, target_count, target_dt, is_target_contig, target_lb, true); } }
void shmem_integer_get_f(FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *len, MPI_Fint *pe) { size_t integer_type_size = 0; ompi_datatype_type_size(&ompi_mpi_integer.dt, &integer_type_size); MCA_SPML_CALL(get(oshmem_ctx_default, FPTR_2_VOID_PTR(source), OMPI_FINT_2_INT(*len) * integer_type_size, FPTR_2_VOID_PTR(target), OMPI_FINT_2_INT(*pe))); }
void shmem_character_get_f(FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *len, MPI_Fint *pe) { size_t character_type_size = 0; ompi_datatype_type_size(&ompi_mpi_character.dt, &character_type_size); MCA_SPML_CALL(get(FPTR_2_VOID_PTR(source), OMPI_FINT_2_INT(*len) * character_type_size, FPTR_2_VOID_PTR(target), OMPI_FINT_2_INT(*pe))); }
void shmem_logical_put_f(FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *length, MPI_Fint *pe) { size_t logical_type_size = 0; ompi_datatype_type_size(&ompi_mpi_logical.dt, &logical_type_size); MCA_SPML_CALL(put(oshmem_ctx_default, FPTR_2_VOID_PTR(target), OMPI_FINT_2_INT(*length) * logical_type_size, FPTR_2_VOID_PTR(source), OMPI_FINT_2_INT(*pe))); }
void shmem_complex_put_nbi_f(FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *length, MPI_Fint *pe) { size_t complex_type_size = 0; ompi_datatype_type_size(&ompi_mpi_cplex.dt, &complex_type_size); MCA_SPML_CALL(put_nb(FPTR_2_VOID_PTR(target), OMPI_FINT_2_INT(*length) * complex_type_size, FPTR_2_VOID_PTR(source), OMPI_FINT_2_INT(*pe), NULL)); }
void shmem_double_put_nbi_f(FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *length, MPI_Fint *pe) { size_t double_precision_type_size = 0; ompi_datatype_type_size(&ompi_mpi_dblprec.dt, &double_precision_type_size); MCA_SPML_CALL(put_nb(FPTR_2_VOID_PTR(target), OMPI_FINT_2_INT(*length) * double_precision_type_size, FPTR_2_VOID_PTR(source), OMPI_FINT_2_INT(*pe), NULL)); }
int ompi_coll_tuned_allgatherv_intra_dec_dynamic(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, const int *rcounts, const int *rdispls, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module; OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_allgatherv_intra_dec_dynamic")); if (tuned_module->com_rules[ALLGATHERV]) { /* We have file based rules: - calculate message size and other necessary information */ int comsize, i; int alg, faninout, segsize, ignoreme; size_t dsize, total_size; comsize = ompi_comm_size(comm); ompi_datatype_type_size (sdtype, &dsize); total_size = 0; for (i = 0; i < comsize; i++) { total_size += dsize * rcounts[i]; } alg = ompi_coll_tuned_get_target_method_params (tuned_module->com_rules[ALLGATHERV], total_size, &faninout, &segsize, &ignoreme); if (alg) { /* we have found a valid choice from the file based rules for this message size */ return ompi_coll_tuned_allgatherv_intra_do_this (sbuf, scount, sdtype, rbuf, rcounts, rdispls, rdtype, comm, module, alg, faninout, segsize); } } /* We do not have file based rules */ if (tuned_module->user_forced[ALLGATHERV].algorithm) { /* User-forced algorithm */ return ompi_coll_tuned_allgatherv_intra_do_this(sbuf, scount, sdtype, rbuf, rcounts, rdispls, rdtype, comm, module, tuned_module->user_forced[ALLGATHERV].algorithm, tuned_module->user_forced[ALLGATHERV].tree_fanout, tuned_module->user_forced[ALLGATHERV].segsize); } /* Use default decision */ return ompi_coll_tuned_allgatherv_intra_dec_fixed (sbuf, scount, sdtype, rbuf, rcounts, rdispls, rdtype, comm, module); }
/* try to get a small message out on to the wire quickly */ static inline int mca_pml_ob1_send_inline (const void *buf, size_t count, ompi_datatype_t * datatype, int dst, int tag, int16_t seqn, ompi_proc_t *dst_proc, mca_bml_base_endpoint_t* endpoint, ompi_communicator_t * comm) { mca_pml_ob1_match_hdr_t match; mca_bml_base_btl_t *bml_btl; opal_convertor_t convertor; size_t size; int rc; bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager); if( NULL == bml_btl->btl->btl_sendi) return OMPI_ERR_NOT_AVAILABLE; ompi_datatype_type_size (datatype, &size); if ((size * count) > 256) { /* some random number */ return OMPI_ERR_NOT_AVAILABLE; } if (count > 0) { /* initialize just enough of the convertor to avoid a SEGV in opal_convertor_cleanup */ OBJ_CONSTRUCT(&convertor, opal_convertor_t); /* We will create a convertor specialized for the */ /* remote architecture and prepared with the datatype. */ opal_convertor_copy_and_prepare_for_send (dst_proc->super.proc_convertor, (const struct opal_datatype_t *) datatype, count, buf, 0, &convertor); opal_convertor_get_packed_size (&convertor, &size); } else { size = 0; } mca_pml_ob1_match_hdr_prepare (&match, MCA_PML_OB1_HDR_TYPE_MATCH, 0, comm->c_contextid, comm->c_my_rank, tag, seqn); ob1_hdr_hton(&match, MCA_PML_OB1_HDR_TYPE_MATCH, dst_proc); /* try to send immediately */ rc = mca_bml_base_sendi (bml_btl, &convertor, &match, OMPI_PML_OB1_MATCH_HDR_LEN, size, MCA_BTL_NO_ORDER, MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP, MCA_PML_OB1_HDR_TYPE_MATCH, NULL); if (count > 0) { opal_convertor_cleanup (&convertor); } if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { return rc; } return (int) size; }
int mca_coll_monitoring_ineighbor_alltoall(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t *module) { mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; size_t type_size, data_size, data_size_aggreg = 0; const mca_topo_base_comm_cart_t *cart = comm->c_topo->mtc.cart; int dim, srank, drank, world_rank; ompi_datatype_type_size(sdtype, &type_size); data_size = scount * type_size; for( dim = 0; dim < cart->ndims; ++dim ) { srank = MPI_PROC_NULL, drank = MPI_PROC_NULL; if (cart->dims[dim] > 1) { mca_topo_base_cart_shift (comm, dim, 1, &srank, &drank); } else if (1 == cart->dims[dim] && cart->periods[dim]) { /* Don't record exchanges with self */ continue; } if (MPI_PROC_NULL != srank) { /** * If this fails the destination is not part of my MPI_COM_WORLD * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank */ if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(srank, comm, &world_rank) ) { mca_common_monitoring_record_coll(world_rank, data_size); data_size_aggreg += data_size; } } if (MPI_PROC_NULL != drank) { /** * If this fails the destination is not part of my MPI_COM_WORLD * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank */ if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(drank, comm, &world_rank) ) { mca_common_monitoring_record_coll(world_rank, data_size); data_size_aggreg += data_size; } } } mca_common_monitoring_coll_a2a(data_size_aggreg, monitoring_module->data); return monitoring_module->real.coll_ineighbor_alltoall(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, request, monitoring_module->real.coll_ineighbor_alltoall_module); }
/* * reduce_scatter_intra_dec * * Function: - seletects reduce_scatter algorithm to use * Accepts: - same arguments as MPI_Reduce_scatter() * Returns: - MPI_SUCCESS or error code (passed from * the reduce scatter implementation) * Note: If we detect zero valued counts in the rcounts array, we * fall back to the nonoverlapping algorithm because the other * algorithms do not currently handle it. */ int ompi_coll_tuned_reduce_scatter_intra_dec_fixed( void *sbuf, void *rbuf, int *rcounts, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { int comm_size, i, pow2; size_t total_message_size, dsize; const double a = 0.0012; const double b = 8.0; const size_t small_message_size = 12 * 1024; const size_t large_message_size = 256 * 1024; bool zerocounts = false; OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_reduce_scatter_intra_dec_fixed")); comm_size = ompi_comm_size(comm); /* We need data size for decision function */ ompi_datatype_type_size(dtype, &dsize); total_message_size = 0; for (i = 0; i < comm_size; i++) { total_message_size += rcounts[i]; if (0 == rcounts[i]) { zerocounts = true; } } if( !ompi_op_is_commute(op) || (zerocounts)) { return ompi_coll_tuned_reduce_scatter_intra_nonoverlapping (sbuf, rbuf, rcounts, dtype, op, comm, module); } total_message_size *= dsize; /* compute the nearest power of 2 */ pow2 = opal_next_poweroftwo_inclusive (comm_size); if ((total_message_size <= small_message_size) || ((total_message_size <= large_message_size) && (pow2 == comm_size)) || (comm_size >= a * total_message_size + b)) { return ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(sbuf, rbuf, rcounts, dtype, op, comm, module); } return ompi_coll_tuned_reduce_scatter_intra_ring(sbuf, rbuf, rcounts, dtype, op, comm, module); }
int ompi_coll_tuned_allgatherv_intra_dec_fixed(void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int *rcounts, int *rdispls, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { int i; int communicator_size; size_t dsize, total_dsize; communicator_size = ompi_comm_size(comm); /* Special case for 2 processes */ if (communicator_size == 2) { return ompi_coll_tuned_allgatherv_intra_two_procs (sbuf, scount, sdtype, rbuf, rcounts, rdispls, rdtype, comm, module); } /* Determine complete data size */ ompi_datatype_type_size(sdtype, &dsize); total_dsize = 0; for (i = 0; i < communicator_size; i++) { total_dsize += dsize * rcounts[i]; } OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_allgatherv_intra_dec_fixed" " rank %d com_size %d msg_length %lu", ompi_comm_rank(comm), communicator_size, (unsigned long)total_dsize)); /* Decision based on allgather decision. */ if (total_dsize < 50000) { return ompi_coll_tuned_allgatherv_intra_bruck(sbuf, scount, sdtype, rbuf, rcounts, rdispls, rdtype, comm, module); } else { if (communicator_size % 2) { return ompi_coll_tuned_allgatherv_intra_ring(sbuf, scount, sdtype, rbuf, rcounts, rdispls, rdtype, comm, module); } else { return ompi_coll_tuned_allgatherv_intra_neighborexchange(sbuf, scount, sdtype, rbuf, rcounts, rdispls, rdtype, comm, module); } } }
int ompi_coll_tuned_allgather_intra_dec_dynamic(void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module; mca_coll_tuned_comm_t *data = tuned_module->tuned_data; OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_allgather_intra_dec_dynamic")); if (data->com_rules[ALLGATHER]) { /* We have file based rules: - calculate message size and other necessary information */ int comsize; int alg, faninout, segsize, ignoreme; size_t dsize; ompi_datatype_type_size (sdtype, &dsize); comsize = ompi_comm_size(comm); dsize *= (ptrdiff_t)comsize * (ptrdiff_t)scount; alg = ompi_coll_tuned_get_target_method_params (data->com_rules[ALLGATHER], dsize, &faninout, &segsize, &ignoreme); if (alg) { /* we have found a valid choice from the file based rules for this message size */ return ompi_coll_tuned_allgather_intra_do_this (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module, alg, faninout, segsize); } } /* We do not have file based rules */ if (data->user_forced[ALLGATHER].algorithm) { /* User-forced algorithm */ return ompi_coll_tuned_allgather_intra_do_forced (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module); } /* Use default decision */ return ompi_coll_tuned_allgather_intra_dec_fixed (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module); }
int ompi_coll_tuned_gather_intra_dec_dynamic(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module; OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_gather_intra_dec_dynamic")); /** * check to see if we have some filebased rules. */ if (tuned_module->com_rules[GATHER]) { int comsize, alg, faninout, segsize, max_requests; size_t dsize; comsize = ompi_comm_size(comm); ompi_datatype_type_size (sdtype, &dsize); dsize *= comsize; alg = ompi_coll_tuned_get_target_method_params (tuned_module->com_rules[GATHER], dsize, &faninout, &segsize, &max_requests); if (alg) { /* we have found a valid choice from the file based rules for this message size */ return ompi_coll_tuned_gather_intra_do_this (sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm, module, alg, faninout, segsize); } /* found a method */ } /*end if any com rules to check */ if (tuned_module->user_forced[GATHER].algorithm) { return ompi_coll_tuned_gather_intra_do_this(sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm, module, tuned_module->user_forced[GATHER].algorithm, tuned_module->user_forced[GATHER].tree_fanout, tuned_module->user_forced[GATHER].segsize); } return ompi_coll_tuned_gather_intra_dec_fixed (sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm, module); }
MPI_Fint shmem_swap_f(FORTRAN_POINTER_T target, FORTRAN_POINTER_T value, MPI_Fint *pe) { size_t integer_type_size = 0; MPI_Fint out_value = 0; ompi_datatype_type_size(&ompi_mpi_integer.dt, &integer_type_size); MCA_ATOMIC_CALL(cswap(FPTR_2_VOID_PTR(target), (void *)&out_value, NULL, FPTR_2_VOID_PTR(value), integer_type_size, OMPI_FINT_2_INT(*pe))); return out_value; }
int ompi_coll_tuned_scatter_intra_dec_fixed(void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { const size_t small_block_size = 300; const int small_comm_size = 10; int communicator_size, rank; size_t dsize, block_size; OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_scatter_intra_dec_fixed")); communicator_size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); /* Determine block size */ if (root == rank) { ompi_datatype_type_size(sdtype, &dsize); block_size = dsize * scount; } else { ompi_datatype_type_size(rdtype, &dsize); block_size = dsize * rcount; } if ((communicator_size > small_comm_size) && (block_size < small_block_size)) { return ompi_coll_tuned_scatter_intra_binomial (sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm, module); } return ompi_coll_tuned_scatter_intra_basic_linear (sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm, module); }
int ompi_osc_ucx_compare_and_swap(const void *origin_addr, const void *compare_addr, void *result_addr, struct ompi_datatype_t *dt, int target, ptrdiff_t target_disp, struct ompi_win_t *win) { ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t *)win->w_osc_module; ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, target); uint64_t remote_addr = (module->win_info_array[target]).addr + target_disp * OSC_UCX_GET_DISP(module, target); ucp_rkey_h rkey; size_t dt_bytes; ompi_osc_ucx_internal_request_t *req = NULL; int ret = OMPI_SUCCESS; ucs_status_t status; ret = check_sync_state(module, target, false); if (ret != OMPI_SUCCESS) { return ret; } ret = start_atomicity(module, ep, target); if (ret != OMPI_SUCCESS) { return ret; } if (module->flavor == MPI_WIN_FLAVOR_DYNAMIC) { status = get_dynamic_win_info(remote_addr, module, ep, target); if (status != UCS_OK) { return OMPI_ERROR; } } rkey = (module->win_info_array[target]).rkey; ompi_datatype_type_size(dt, &dt_bytes); memcpy(result_addr, origin_addr, dt_bytes); req = ucp_atomic_fetch_nb(ep, UCP_ATOMIC_FETCH_OP_CSWAP, *(uint64_t *)compare_addr, result_addr, dt_bytes, remote_addr, rkey, req_completion); if (UCS_PTR_IS_PTR(req)) { ucp_request_release(req); } ret = incr_and_check_ops_num(module, target, ep); if (ret != OMPI_SUCCESS) { return ret; } return end_atomicity(module, ep, target); }
int ompi_coll_tuned_alltoall_intra_dec_dynamic(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module; OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_alltoall_intra_dec_dynamic")); /* check to see if we have some filebased rules */ if (tuned_module->com_rules[ALLTOALL]) { /* we do, so calc the message size or what ever we need and use this for the evaluation */ int comsize; int alg, faninout, segsize, max_requests; size_t dsize; ompi_datatype_type_size (sdtype, &dsize); comsize = ompi_comm_size(comm); dsize *= (ptrdiff_t)comsize * (ptrdiff_t)scount; alg = ompi_coll_tuned_get_target_method_params (tuned_module->com_rules[ALLTOALL], dsize, &faninout, &segsize, &max_requests); if (alg) { /* we have found a valid choice from the file based rules for this message size */ return ompi_coll_tuned_alltoall_intra_do_this (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module, alg, faninout, segsize, max_requests); } /* found a method */ } /*end if any com rules to check */ if (tuned_module->user_forced[ALLTOALL].algorithm) { return ompi_coll_tuned_alltoall_intra_do_this(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module, tuned_module->user_forced[ALLTOALL].algorithm, tuned_module->user_forced[ALLTOALL].tree_fanout, tuned_module->user_forced[ALLTOALL].segsize, tuned_module->user_forced[ALLTOALL].max_requests); } return ompi_coll_tuned_alltoall_intra_dec_fixed (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module); }
static int create_segments( ompi_datatype_t* datatype, int count, size_t segment_length, ddt_segment_t** segments, int* seg_count ) { size_t data_size, total_length, position; opal_convertor_t* convertor; int i; ddt_segment_t* segment; ompi_datatype_type_size( datatype, &data_size ); data_size *= count; *seg_count = data_size / segment_length; if( ((*seg_count) * segment_length) != data_size ) *seg_count += 1; allocate_segments: *segments = (ddt_segment_t*)malloc( (*seg_count) * sizeof(ddt_segment_t) ); convertor = opal_convertor_create( opal_local_arch, 0 ); opal_convertor_prepare_for_send( convertor, &(datatype->super), count, NULL ); position = 0; total_length = 0; for( i = 0; i < (*seg_count); i++ ) { segment = &((*segments)[i]); segment->buffer = malloc(segment_length); segment->position = position; /* Find the end of the segment */ position += segment_length; opal_convertor_set_position( convertor, &position ); segment->size = position - segment->position; total_length += segment->size; } OBJ_RELEASE(convertor); if( total_length != data_size ) { for( i = 0; i < (*seg_count); i++ ) { segment = &((*segments)[i]); free(segment->buffer); } free( *segments ); (*seg_count) += 1; goto allocate_segments; } return 0; }
/* * reduce_scatter_intra_dec * * Function: - seletects reduce_scatter algorithm to use * Accepts: - same arguments as MPI_Reduce_scatter() * Returns: - MPI_SUCCESS or error code (passed from * the reduce_scatter implementation) * */ int ompi_coll_tuned_reduce_scatter_intra_dec_dynamic(void *sbuf, void *rbuf, int *rcounts, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module; mca_coll_tuned_comm_t *data = tuned_module->tuned_data; OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:reduce_scatter_intra_dec_dynamic")); /* check to see if we have some filebased rules */ if (data->com_rules[REDUCESCATTER]) { /* we do, so calc the message size or what ever we need and use this for the evaluation */ int alg, faninout, segsize, ignoreme, i, count, size; size_t dsize; size = ompi_comm_size(comm); for (i = 0, count = 0; i < size; i++) { count += rcounts[i];} ompi_datatype_type_size (dtype, &dsize); dsize *= count; alg = ompi_coll_tuned_get_target_method_params (data->com_rules[REDUCESCATTER], dsize, &faninout, &segsize, &ignoreme); if (alg) { /* we have found a valid choice from the file based rules for this message size */ return ompi_coll_tuned_reduce_scatter_intra_do_this (sbuf, rbuf, rcounts, dtype, op, comm, module, alg, faninout, segsize); } /* found a method */ } /*end if any com rules to check */ if (data->user_forced[REDUCESCATTER].algorithm) { return ompi_coll_tuned_reduce_scatter_intra_do_forced (sbuf, rbuf, rcounts, dtype, op, comm, module); } return ompi_coll_tuned_reduce_scatter_intra_dec_fixed (sbuf, rbuf, rcounts, dtype, op, comm, module); }
void shmem_integer_iget_f(FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *tst, MPI_Fint *sst, MPI_Fint *len, MPI_Fint *pe) { int i; int length = OMPI_FINT_2_INT(*len); int tst_c = OMPI_FINT_2_INT(*tst); int sst_c = OMPI_FINT_2_INT(*sst); size_t integer_type_size = 0; ompi_datatype_type_size(&ompi_mpi_integer.dt, &integer_type_size); for (i=0; i<length; i++) { MCA_SPML_CALL(get((uint8_t *)FPTR_2_VOID_PTR(source) + i * sst_c * integer_type_size, integer_type_size, (uint8_t *)FPTR_2_VOID_PTR(target) + i * tst_c * integer_type_size, OMPI_FINT_2_INT(*pe))); } }
/* * allreduce_intra * * Function: - allreduce using other MPI collectives * Accepts: - same as MPI_Allreduce() * Returns: - MPI_SUCCESS or error code */ int ompi_coll_tuned_allreduce_intra_dec_fixed (void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { size_t dsize, block_dsize; int comm_size = ompi_comm_size(comm); const size_t intermediate_message = 10000; OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_allreduce_intra_dec_fixed")); /** * Decision function based on MX results from the Grig cluster at UTK. * * Currently, linear, recursive doubling, and nonoverlapping algorithms * can handle both commutative and non-commutative operations. * Ring algorithm does not support non-commutative operations. */ ompi_datatype_type_size(dtype, &dsize); block_dsize = dsize * count; if (block_dsize < intermediate_message) { return (ompi_coll_tuned_allreduce_intra_recursivedoubling (sbuf, rbuf, count, dtype, op, comm, module)); } if( ompi_op_is_commute(op) && (count > comm_size) ) { const size_t segment_size = 1 << 20; /* 1 MB */ if ((comm_size * segment_size >= block_dsize)) { return (ompi_coll_tuned_allreduce_intra_ring (sbuf, rbuf, count, dtype, op, comm, module)); } else { return (ompi_coll_tuned_allreduce_intra_ring_segmented (sbuf, rbuf, count, dtype, op, comm, module, segment_size)); } } return (ompi_coll_tuned_allreduce_intra_nonoverlapping (sbuf, rbuf, count, dtype, op, comm, module)); }