Ejemplo n.º 1
0
int
ompi_osc_pt2pt_passive_unlock(ompi_osc_pt2pt_module_t *module,
                              int32_t origin,
                              int32_t count)
{
    ompi_proc_t *proc = ompi_comm_peer_lookup( module->p2p_comm, origin );
    ompi_osc_pt2pt_pending_lock_t *new_pending = NULL;

    assert(module->p2p_lock_status != 0);

    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_output,
                         "%d: received unlock request from %d with %d requests\n",
                         ompi_comm_rank(module->p2p_comm),
                         origin, count));

    new_pending = OBJ_NEW(ompi_osc_pt2pt_pending_lock_t);
    new_pending->proc = proc;
    new_pending->lock_type = 0;
    OPAL_THREAD_LOCK(&(module->p2p_lock));
    module->p2p_num_pending_in += count;
    opal_list_append(&module->p2p_unlocks_pending, &(new_pending->super));
    OPAL_THREAD_UNLOCK(&(module->p2p_lock));

    return ompi_osc_pt2pt_passive_unlock_complete(module);
}
Ejemplo n.º 2
0
int
ompi_osc_pt2pt_module_lock(int lock_type,
                           int target,
                           int assert,
                           ompi_win_t *win)
{
    ompi_osc_pt2pt_module_t *module = P2P_MODULE(win);
    ompi_proc_t *proc = ompi_comm_peer_lookup( module->p2p_comm, target );

    assert(lock_type != 0);

    /* set our mode on the window */
    ompi_win_remove_mode(win, OMPI_WIN_FENCE);
    ompi_win_append_mode(win, OMPI_WIN_ACCESS_EPOCH | OMPI_WIN_LOCK_ACCESS);

    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_output,
                         "%d: sending lock request to %d", 
                         ompi_comm_rank(module->p2p_comm),
                         target));
    /* generate a lock request */
    ompi_osc_pt2pt_control_send(module, 
                                proc,
                                OMPI_OSC_PT2PT_HDR_LOCK_REQ,
                                ompi_comm_rank(module->p2p_comm),
                                lock_type);

    /* return */
    return OMPI_SUCCESS;
}
Ejemplo n.º 3
0
    /*ompi_request_test(&ompi_req,completed,MPI_STATUS_IGNORE); */
    *completed = ompi_req->req_complete;
    if (*completed) {
        ompi_request_free(&ompi_req);
        request->status = HCOLRTE_REQUEST_DONE;
    }

    return HCOLL_SUCCESS;
}

static int ec_handle_compare( rte_ec_handle_t handle_1 ,
                              rte_grp_handle_t
                              group_handle_1 ,
                              rte_ec_handle_t handle_2 ,
                              rte_grp_handle_t
                              group_handle_2 )
{
    return handle_1.handle == handle_2.handle;
}

static int get_ec_handles( int num_ec ,
                           int * ec_indexes ,
                           rte_grp_handle_t grp_h,
                           rte_ec_handle_t * ec_handles )
{
    int i;
    ompi_communicator_t *comm = (ompi_communicator_t *)grp_h;
    for (i=0; i<num_ec; i++) {
        ompi_proc_t *proc = ompi_comm_peer_lookup(comm,ec_indexes[i]);
        ec_handles[i].rank = ec_indexes[i];
        ec_handles[i].handle = (void *)proc;
    }
    return HCOLL_SUCCESS;
}

#if 0 /* This callback is not used */
static int get_my_ec ( rte_grp_handle_t grp_h, rte_ec_handle_t *ec_handle)
{
    ompi_communicator_t *comm = (ompi_communicator_t *)grp_h;
    int my_rank = ompi_comm_rank(comm);
    ompi_proc_t *my_proc = ompi_comm_peer_lookup(comm,my_rank);
    ec_handle->handle = (void *)my_proc;
    ec_handle->rank = my_rank;
    return HCOLL_SUCCESS;
}
Ejemplo n.º 4
0
int mca_pml_ob1_isend(void *buf,
                      size_t count,
                      ompi_datatype_t * datatype,
                      int dst,
                      int tag,
                      mca_pml_base_send_mode_t sendmode,
                      ompi_communicator_t * comm,
                      ompi_request_t ** request)
{
    mca_pml_ob1_comm_t* ob1_comm = comm->c_pml_comm;
    mca_pml_ob1_send_request_t *sendreq = NULL;
    ompi_proc_t *dst_proc = ompi_comm_peer_lookup (comm, dst);
    mca_bml_base_endpoint_t* endpoint = (mca_bml_base_endpoint_t*)
                                        dst_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
    int16_t seqn;
    int rc;

    seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_comm->procs[dst].send_sequence, 1);

    if (MCA_PML_BASE_SEND_SYNCHRONOUS != sendmode) {
        rc = mca_pml_ob1_send_inline (buf, count, datatype, dst, tag, seqn, dst_proc,
                                      endpoint, comm);
        if (OPAL_LIKELY(0 <= rc)) {
            /* NTH: it is legal to return ompi_request_empty since the only valid
             * field in a send completion status is whether or not the send was
             * cancelled (which it can't be at this point anyway). */
            *request = &ompi_request_empty;
            return OMPI_SUCCESS;
        }
    }

    MCA_PML_OB1_SEND_REQUEST_ALLOC(comm, dst, sendreq);
    if (NULL == sendreq)
        return OMPI_ERR_OUT_OF_RESOURCE;

    MCA_PML_OB1_SEND_REQUEST_INIT(sendreq,
                                  buf,
                                  count,
                                  datatype,
                                  dst, tag,
                                  comm, sendmode, false);

    PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE,
                             &(sendreq)->req_send.req_base,
                             PERUSE_SEND);

    MCA_PML_OB1_SEND_REQUEST_START_W_SEQ(sendreq, endpoint, seqn, rc);
    *request = (ompi_request_t *) sendreq;
    return rc;
}
Ejemplo n.º 5
0
static int get_ec_handles( int num_ec ,
                           int * ec_indexes ,
                           rte_grp_handle_t grp_h,
                           rte_ec_handle_t * ec_handles )
{
    int i;
    ompi_communicator_t *comm = (ompi_communicator_t *)grp_h;
    for (i=0; i<num_ec; i++) {
        ompi_proc_t *proc = ompi_comm_peer_lookup(comm,ec_indexes[i]);
        ec_handles[i].rank = ec_indexes[i];
        ec_handles[i].handle = (void *)proc;
    }
    return HCOLL_SUCCESS;
}
Ejemplo n.º 6
0
ucp_ep_h mca_pml_ucx_add_proc(ompi_communicator_t *comm, int dst)
{
    ucp_address_t *address;
    ucs_status_t status;
    size_t addrlen;
    ucp_ep_h ep;
    int ret;

    ompi_proc_t *proc0      = ompi_comm_peer_lookup(comm, 0);
    ompi_proc_t *proc_peer = ompi_comm_peer_lookup(comm, dst);

    /* Note, mca_pml_base_pml_check_selected, doesn't use 3rd argument */
    if (OMPI_SUCCESS != (ret = mca_pml_base_pml_check_selected("ucx",
                                                              &proc0,
                                                              dst))) {
        return NULL;
    }

    ret = mca_pml_ucx_recv_worker_address(proc_peer, &address, &addrlen);
    if (ret < 0) {
        PML_UCX_ERROR("Failed to receive worker address from proc: %d", proc_peer->super.proc_name.vpid);
        return NULL;
    }

    PML_UCX_VERBOSE(2, "connecting to proc. %d", proc_peer->super.proc_name.vpid);
    status = ucp_ep_create(ompi_pml_ucx.ucp_worker, address, &ep);
    free(address);
    if (UCS_OK != status) {
        PML_UCX_ERROR("Failed to connect to proc: %d, %s", proc_peer->super.proc_name.vpid,
                                                           ucs_status_string(status));
        return NULL;
    }

    proc_peer->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PML] = ep;

    return ep;
}
Ejemplo n.º 7
0
int
ompi_mtl_psm_isend(struct mca_mtl_base_module_t* mtl, 
                  struct ompi_communicator_t* comm,
                  int dest,
                  int tag,
                  struct ompi_convertor_t *convertor,
                  mca_pml_base_send_mode_t mode,
                  bool blocking,
                  mca_mtl_request_t * mtl_request)
{
    psm_error_t psm_error;
    uint64_t mqtag;
    uint32_t flags = 0;
    int ret;
    mca_mtl_psm_request_t * mtl_psm_request = (mca_mtl_psm_request_t*) mtl_request;
    size_t length;
    ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest );
    mca_mtl_psm_endpoint_t* psm_endpoint = (mca_mtl_psm_endpoint_t*)ompi_proc->proc_pml;

    assert(mtl == &ompi_mtl_psm.super);

    mqtag = PSM_MAKE_MQTAG(comm->c_contextid, comm->c_my_rank, tag);

    
    ret = ompi_mtl_datatype_pack(convertor, 
                                 &mtl_psm_request->buf,
                                 &length, 
                                 &mtl_psm_request->free_after);

    mtl_psm_request->length= length;
    mtl_psm_request->convertor = convertor;
    mtl_psm_request->type = OMPI_MTL_PSM_ISEND;

    if (OMPI_SUCCESS != ret) return ret;

    if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS)
	flags |= PSM_MQ_FLAG_SENDSYNC;
    
    psm_error = psm_mq_isend(ompi_mtl_psm.mq,
			     psm_endpoint->peer_addr,
			     flags,
			     mqtag,
			     mtl_psm_request->buf,
			     length,
			     mtl_psm_request,
			     &mtl_psm_request->psm_request);
    
    return psm_error == PSM_OK ? OMPI_SUCCESS : OMPI_ERROR;
}
Ejemplo n.º 8
0
int
ompi_mtl_psm2_send(struct mca_mtl_base_module_t* mtl,
                 struct ompi_communicator_t* comm,
                 int dest,
                 int tag,
                 struct opal_convertor_t *convertor,
                 mca_pml_base_send_mode_t mode)
{
    psm_error_t err;
    mca_mtl_psm2_request_t mtl_psm2_request;
    psm_mq_tag_t mqtag;
    uint32_t flags = 0;
    int ret;
    size_t length;
    ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest );
    mca_mtl_psm2_endpoint_t* psm_endpoint = (mca_mtl_psm2_endpoint_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];

    assert(mtl == &ompi_mtl_psm2.super);

    PSM_MAKE_MQTAG(comm->c_contextid, comm->c_my_rank, tag, mqtag);

    ret = ompi_mtl_datatype_pack(convertor,
                                 &mtl_psm2_request.buf,
                                 &length,
                                 &mtl_psm2_request.free_after);


    mtl_psm2_request.length = length;
    mtl_psm2_request.convertor = convertor;
    mtl_psm2_request.type = OMPI_mtl_psm2_ISEND;

    if (OMPI_SUCCESS != ret) return ret;

    if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS)
	flags |= PSM_MQ_FLAG_SENDSYNC;

    err = psm_mq_send2(ompi_mtl_psm2.mq,
		      psm_endpoint->peer_addr,
		      flags,
		      &mqtag,
		      mtl_psm2_request.buf,
		      length);

    if (mtl_psm2_request.free_after) {
	free(mtl_psm2_request.buf);
    }

    return err == PSM_OK ? OMPI_SUCCESS : OMPI_ERROR;
}
Ejemplo n.º 9
0
int
ompi_osc_pt2pt_passive_lock(ompi_osc_pt2pt_module_t *module,
                            int32_t origin,
                            int32_t lock_type)
{
    bool send_ack = false;
    int ret = OMPI_SUCCESS;
    ompi_proc_t *proc = ompi_comm_peer_lookup( module->p2p_comm, origin );
    ompi_osc_pt2pt_pending_lock_t *new_pending;

    OPAL_THREAD_LOCK(&(module->p2p_lock));
    if (lock_type == MPI_LOCK_EXCLUSIVE) {
        if (module->p2p_lock_status == 0) {
            module->p2p_lock_status = MPI_LOCK_EXCLUSIVE;
            OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_output,
                                 "%d: setting lock status to EXCLUSIVE (from %d)",
                                 ompi_comm_rank(module->p2p_comm), origin));
            ompi_win_append_mode(module->p2p_win, OMPI_WIN_EXPOSE_EPOCH);
            send_ack = true;
        } else {
            OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_output,
                                 "%d: queuing lock request from %d (type=%d)", 
                                 ompi_comm_rank(module->p2p_comm), origin, lock_type));
            new_pending = OBJ_NEW(ompi_osc_pt2pt_pending_lock_t);
            new_pending->proc = proc;
            new_pending->lock_type = lock_type;
            opal_list_append(&(module->p2p_locks_pending), &(new_pending->super));
        }
    } else if (lock_type == MPI_LOCK_SHARED) {
        if (module->p2p_lock_status != MPI_LOCK_EXCLUSIVE) {
            module->p2p_lock_status = MPI_LOCK_SHARED;
            module->p2p_shared_count++;
            OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_output,
                                 "%d: setting lock status to SHARED (from %d), count %d",
                                 ompi_comm_rank(module->p2p_comm), origin, module->p2p_shared_count));
            ompi_win_append_mode(module->p2p_win, OMPI_WIN_EXPOSE_EPOCH);
            send_ack = true;
        } else {
            OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_output,
                                 "%d: queuing lock request from %d (type=%d)", 
                                 ompi_comm_rank(module->p2p_comm), origin, lock_type));
            new_pending = OBJ_NEW(ompi_osc_pt2pt_pending_lock_t);
            new_pending->proc = proc;
            new_pending->lock_type = lock_type;
            opal_list_append(&(module->p2p_locks_pending), &(new_pending->super));
        }
    } else {
        ret = OMPI_ERROR;
    }
    OPAL_THREAD_UNLOCK(&(module->p2p_lock));

    if (send_ack) {
        OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_output,
                             "%d: sending lock ack to %d", 
                             ompi_comm_rank(module->p2p_comm), origin));
        ompi_osc_pt2pt_control_send(module, proc,
                                    OMPI_OSC_PT2PT_HDR_LOCK_REQ,
                                    ompi_comm_rank(module->p2p_comm),
                                    OMPI_SUCCESS);
    }

    return OMPI_SUCCESS;
}
Ejemplo n.º 10
0
int
ompi_osc_pt2pt_module_unlock(int target,
                             ompi_win_t *win)
{
    int32_t out_count;
    opal_list_item_t *item;
    int ret;
    ompi_osc_pt2pt_module_t *module = P2P_MODULE(win);
    ompi_proc_t *proc = ompi_comm_peer_lookup( module->p2p_comm, target );

    OPAL_THREAD_LOCK(&module->p2p_lock);
    while (0 == module->p2p_lock_received_ack) {
        opal_condition_wait(&module->p2p_cond, &module->p2p_lock);
    }

    module->p2p_lock_received_ack -= 1;

    /* start all the requests */
    ompi_osc_pt2pt_flip_sendreqs(module);

    /* try to start all the requests.  We've copied everything we need
       out of pending_sendreqs, so don't need the lock here */
    out_count = opal_list_get_size(&(module->p2p_copy_pending_sendreqs));

    /* we want to send all the requests, plus we wait for one more
       completion event for the control message ack from the unlocker
       saying we're done */
    module->p2p_num_pending_out += (out_count + 1);
    OPAL_THREAD_UNLOCK(&module->p2p_lock);

    /* send the unlock request */
    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_output,
                         "%d: sending unlock request to %d with %d requests", 
                         ompi_comm_rank(module->p2p_comm), target,
                         out_count));
    ompi_osc_pt2pt_control_send(module, 
                                proc,
                                OMPI_OSC_PT2PT_HDR_UNLOCK_REQ,
                                ompi_comm_rank(module->p2p_comm),
                                out_count);

    while (NULL != 
           (item = opal_list_remove_first(&(module->p2p_copy_pending_sendreqs)))) {
        ompi_osc_pt2pt_sendreq_t *req = 
            (ompi_osc_pt2pt_sendreq_t*) item;

        ret = ompi_osc_pt2pt_sendreq_send(module, req);

        if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == ret ) {
            opal_output_verbose(5, ompi_osc_base_output,
                                "complete: failure in starting sendreq (%d).  Will try later.",
                                ret);
            opal_list_append(&(module->p2p_copy_pending_sendreqs), item);
        } else if (OMPI_SUCCESS != ret) {
            return ret;
        } 
    }

    /* wait for all the requests */
    OPAL_THREAD_LOCK(&module->p2p_lock);
    while (0 != module->p2p_num_pending_out) {
        opal_condition_wait(&module->p2p_cond, &module->p2p_lock);
    }
    OPAL_THREAD_UNLOCK(&module->p2p_lock);

    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_output,
                         "%d: finished unlock to %d",
                         ompi_comm_rank(module->p2p_comm), target));

    /* set our mode on the window */
    ompi_win_remove_mode(win, OMPI_WIN_ACCESS_EPOCH | OMPI_WIN_LOCK_ACCESS);

    return OMPI_SUCCESS;
}
Ejemplo n.º 11
0
int mca_pml_ob1_send(void *buf,
                     size_t count,
                     ompi_datatype_t * datatype,
                     int dst,
                     int tag,
                     mca_pml_base_send_mode_t sendmode,
                     ompi_communicator_t * comm)
{
    mca_pml_ob1_comm_t* ob1_comm = comm->c_pml_comm;
    ompi_proc_t *dst_proc = ompi_comm_peer_lookup (comm, dst);
    mca_bml_base_endpoint_t* endpoint = (mca_bml_base_endpoint_t*)
                                        dst_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
    mca_pml_ob1_send_request_t *sendreq = NULL;
    int16_t seqn;
    int rc;

    if (OPAL_UNLIKELY(MCA_PML_BASE_SEND_BUFFERED == sendmode)) {
        /* large buffered sends *need* a real request so use isend instead */
        ompi_request_t *brequest;

        rc = mca_pml_ob1_isend (buf, count, datatype, dst, tag, sendmode, comm, &brequest);
        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
            return rc;
        }

        /* free the request and return. don't care if it completes now */
        ompi_request_free (&brequest);
        return OMPI_SUCCESS;
    }

    if (OPAL_UNLIKELY(NULL == endpoint)) {
        return OMPI_ERR_UNREACH;
    }

    seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_comm->procs[dst].send_sequence, 1);

    /**
     * The immediate send will not have a request, so they are
     * intracable from the point of view of any debugger attached to
     * the parallel application.
     */
    if (MCA_PML_BASE_SEND_SYNCHRONOUS != sendmode) {
        rc = mca_pml_ob1_send_inline (buf, count, datatype, dst, tag, seqn, dst_proc,
                                      endpoint, comm);
        if (OPAL_LIKELY(0 <= rc)) {
            return OMPI_SUCCESS;
        }
    }

#if !OMPI_ENABLE_THREAD_MULTIPLE
    sendreq = mca_pml_ob1_sendreq;
    if( OPAL_UNLIKELY(NULL == sendreq) )
#endif  /* !OMPI_ENABLE_THREAD_MULTIPLE */
        {
            MCA_PML_OB1_SEND_REQUEST_ALLOC(comm, dst, sendreq);
            if (NULL == sendreq)
                return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
#if !OMPI_ENABLE_THREAD_MULTIPLE
            mca_pml_ob1_sendreq = sendreq;
#endif  /* !OMPI_ENABLE_THREAD_MULTIPLE */
        }
    sendreq->req_send.req_base.req_proc = dst_proc;
    sendreq->rdma_frag = NULL;

    MCA_PML_OB1_SEND_REQUEST_INIT(sendreq,
                                  buf,
                                  count,
                                  datatype,
                                  dst, tag,
                                  comm, sendmode, false);

    PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE,
                             &sendreq->req_send.req_base,
                             PERUSE_SEND);

    MCA_PML_OB1_SEND_REQUEST_START_W_SEQ(sendreq, endpoint, seqn, rc);
    if (OPAL_LIKELY(rc == OMPI_SUCCESS)) {
        ompi_request_wait_completion(&sendreq->req_send.req_base.req_ompi);

        rc = sendreq->req_send.req_base.req_ompi.req_status.MPI_ERROR;
    }

#if OMPI_ENABLE_THREAD_MULTIPLE
    MCA_PML_OB1_SEND_REQUEST_RETURN(sendreq);
#else
    mca_pml_ob1_send_request_fini (sendreq);
#endif

    return rc;
}
Ejemplo n.º 12
0
int MPI_Sendrecv_replace(void * buf, int count, MPI_Datatype datatype,
                         int dest, int sendtag, int source, int recvtag,
                         MPI_Comm comm, MPI_Status *status)

{
    int rc = MPI_SUCCESS;

    if ( MPI_PARAM_CHECK ) {
        rc = MPI_SUCCESS;
        OMPI_ERR_INIT_FINALIZE(FUNC_NAME);
        OMPI_CHECK_DATATYPE_FOR_RECV(rc, datatype, count);

        if (ompi_comm_invalid(comm)) {
            return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME);
        } else if (dest != MPI_PROC_NULL && ompi_comm_peer_invalid(comm, dest)) {
            rc = MPI_ERR_RANK;
        } else if (sendtag < 0 || sendtag > mca_pml.pml_max_tag) {
            rc = MPI_ERR_TAG;
        } else if (source != MPI_PROC_NULL && source != MPI_ANY_SOURCE && ompi_comm_peer_invalid(comm, source)) {
            rc = MPI_ERR_RANK;
        } else if (((recvtag < 0) && (recvtag !=  MPI_ANY_TAG)) || (recvtag > mca_pml.pml_max_tag)) {
            rc = MPI_ERR_TAG;
        }
        OMPI_ERRHANDLER_CHECK(rc, comm, rc, FUNC_NAME);
    }
 
    /* simple case */
    if ( source == MPI_PROC_NULL || dest == MPI_PROC_NULL || count == 0 ) {
        return MPI_Sendrecv(buf,count,datatype,dest,sendtag,buf,count,datatype,source,recvtag,comm,status);
    } else {

        ompi_convertor_t convertor;
        struct iovec iov;
        unsigned char recv_data[2048];
        size_t packed_size, max_data;
        uint32_t iov_count;
        ompi_status_public_t recv_status;
        ompi_proc_t* proc = ompi_comm_peer_lookup(comm,dest);
        if(proc == NULL) {
            rc = MPI_ERR_RANK;
            OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME);
        }

        /* initialize convertor to unpack recv buffer */
        OBJ_CONSTRUCT(&convertor, ompi_convertor_t);
        ompi_convertor_copy_and_prepare_for_recv( proc->proc_convertor, datatype,
                                                  count, buf, 0, &convertor );

        /* setup a buffer for recv */
        ompi_convertor_get_packed_size( &convertor, &packed_size );
        if( packed_size > sizeof(recv_data) ) {
            rc = MPI_Alloc_mem(packed_size, MPI_INFO_NULL, &iov.iov_base);
            if(OMPI_SUCCESS != rc) {
                OMPI_ERRHANDLER_RETURN(OMPI_ERR_OUT_OF_RESOURCE, comm, MPI_ERR_BUFFER, FUNC_NAME);
            }
        } else {
            iov.iov_base = (caddr_t)recv_data;
        }

        /* recv into temporary buffer */
        rc = MPI_Sendrecv( buf, count, datatype, dest, sendtag, iov.iov_base, packed_size, 
                           MPI_BYTE, source, recvtag, comm, &recv_status );
        if (rc != MPI_SUCCESS) {
            if(packed_size > sizeof(recv_data))
                MPI_Free_mem(iov.iov_base);
            OBJ_DESTRUCT(&convertor);
            OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME);
        }

        /* unpack into users buffer */
        iov.iov_len = recv_status._count;
        iov_count = 1;
        max_data = recv_status._count;
        ompi_convertor_unpack(&convertor, &iov, &iov_count, &max_data );

        /* return status to user */
        if(status != MPI_STATUS_IGNORE) {
            *status = recv_status;
        }

        /* release resources */
        if(packed_size > sizeof(recv_data)) {
            MPI_Free_mem(iov.iov_base);
        }
        OBJ_DESTRUCT(&convertor);
        return MPI_SUCCESS;
    }
}
Ejemplo n.º 13
0
int
ompi_osc_pt2pt_sendreq_recv_accum(ompi_osc_pt2pt_module_t *module,
                                  ompi_osc_pt2pt_send_header_t *header,
                                  void *payload)
{
    int ret = OMPI_SUCCESS;
    struct ompi_op_t *op = ompi_osc_pt2pt_op_create(header->hdr_target_op);
    ompi_proc_t *proc = ompi_comm_peer_lookup( module->p2p_comm, header->hdr_origin );
    struct ompi_datatype_t *datatype = 
        ompi_osc_pt2pt_datatype_create(proc, &payload);

    if (header->hdr_msg_length > 0) {
        /* lock the window for accumulates */
        OPAL_THREAD_LOCK(&module->p2p_acc_lock);

        /* copy the data from the temporary buffer into the user window */
        ret = ompi_osc_pt2pt_process_op(module, header, datatype, op, payload, 
                                        header->hdr_msg_length);

        /* unlock the window for accumulates */
        OPAL_THREAD_UNLOCK(&module->p2p_acc_lock);

        /* Release datatype & op */
        OBJ_RELEASE(datatype);
        OBJ_RELEASE(op);

        OPAL_THREAD_ADD32(&(module->p2p_num_pending_in), -1);

        opal_output_verbose(50, ompi_osc_base_output,
                            "%d received accum message from %d",
                            module->p2p_comm->c_my_rank,
                            header->hdr_origin);
        
    } else {
        ompi_osc_pt2pt_longreq_t *longreq;
        ptrdiff_t lb, extent, true_lb, true_extent;
        size_t buflen;

        /* figure out how big a buffer we need */
        ompi_ddt_get_extent(datatype, &lb, &extent);
        ompi_ddt_get_true_extent(datatype, &true_lb, &true_extent);
        buflen = true_extent + (header->hdr_target_count - 1) * extent;

        /* get a longreq and fill it in */
        ompi_osc_pt2pt_longreq_alloc(&longreq);

        longreq->req_comp_cb = ompi_osc_pt2pt_sendreq_recv_accum_long_cb;
        longreq->req_datatype = datatype;
        longreq->req_op = op;
        longreq->req_module = module;

        /* allocate a buffer to receive into ... */
        longreq->req_comp_cbdata = malloc(buflen + sizeof(ompi_osc_pt2pt_send_header_t));
        
        if (NULL == longreq->req_comp_cbdata) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
        /* fill in tmp header */
        memcpy(longreq->req_comp_cbdata, header,
               sizeof(ompi_osc_pt2pt_send_header_t));
        ((ompi_osc_pt2pt_send_header_t*) longreq->req_comp_cbdata)->hdr_msg_length = buflen;

        ret = mca_pml.pml_irecv(((char*) longreq->req_comp_cbdata) + sizeof(ompi_osc_pt2pt_send_header_t),
                                header->hdr_target_count,
                                datatype,
                                header->hdr_origin,
                                header->hdr_origin_tag,
                                module->p2p_comm,
                                &(longreq->req_pml_req));

        opal_output_verbose(50, ompi_osc_base_output,
                            "%d started long recv accum message from %d (%d)",
                            module->p2p_comm->c_my_rank,
                            header->hdr_origin,
                            header->hdr_origin_tag);

        /* put the send request in the waiting list */
        OPAL_THREAD_LOCK(&(module->p2p_lock));
        opal_list_append(&(module->p2p_long_msgs), 
                         &(longreq->super.super));
        OPAL_THREAD_UNLOCK(&(module->p2p_lock));
    }

    return ret;
}
Ejemplo n.º 14
0
int
ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl,
                        struct ompi_communicator_t *comm,
                        int src,
                        int tag,
                        struct opal_convertor_t *convertor,
                        mca_mtl_request_t *mtl_request)
{
    ptl_match_bits_t match_bits, ignore_bits;
    int ret = OMPI_SUCCESS;
    ptl_process_t remote_proc;
    ompi_mtl_portals4_recv_request_t *ptl_request =
        (ompi_mtl_portals4_recv_request_t*) mtl_request;
    void *start;
    size_t length;
    bool free_after;
    ptl_me_t me;

    if  (MPI_ANY_SOURCE == src) {
        if (ompi_mtl_portals4.use_logical) {
            remote_proc.rank = PTL_RANK_ANY;
        } else {
            remote_proc.phys.nid = PTL_NID_ANY;
            remote_proc.phys.pid = PTL_PID_ANY;
        }
    } else if ((ompi_mtl_portals4.use_logical) && (MPI_COMM_WORLD == comm)) {
        remote_proc.rank = src;
    } else {
        ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, src );
        remote_proc = *((ptl_process_t*) ompi_mtl_portals4_get_endpoint (mtl, ompi_proc));
    }

    MTL_PORTALS4_SET_RECV_BITS(match_bits, ignore_bits, comm->c_contextid,
                               src, tag);

    ret = ompi_mtl_datatype_recv_buf(convertor, &start, &length, &free_after);
    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
        return ret;
    }

    ptl_request->super.type = portals4_req_recv;
    ptl_request->super.event_callback = ompi_mtl_portals4_recv_progress;
#if OPAL_ENABLE_DEBUG
    ptl_request->opcount = OPAL_THREAD_ADD64((int64_t*) &ompi_mtl_portals4.recv_opcount, 1);
    ptl_request->hdr_data = 0;
#endif
    ptl_request->buffer_ptr = (free_after) ? start : NULL;
    ptl_request->convertor = convertor;
    ptl_request->delivery_ptr = start;
    ptl_request->delivery_len = length;
    ptl_request->req_started = false;
    ptl_request->super.super.ompi_req->req_status.MPI_ERROR = OMPI_SUCCESS;
    ptl_request->pending_reply = 0;

    OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                         "Recv %lu from %x,%x of length %ld (0x%lx, 0x%lx, 0x%lx)\n",
                         ptl_request->opcount,
                         remote_proc.phys.nid, remote_proc.phys.pid,
                         (int64_t)length, match_bits, ignore_bits, (unsigned long) ptl_request));

    me.start = start;
    me.length = length;
    me.ct_handle = PTL_CT_NONE;
    me.min_free = 0;
    me.uid = ompi_mtl_portals4.uid;
    me.options =
        PTL_ME_OP_PUT |
        PTL_ME_USE_ONCE |
        PTL_ME_EVENT_UNLINK_DISABLE;
    if (length <= ompi_mtl_portals4.short_limit) {
        me.options |= PTL_ME_EVENT_LINK_DISABLE;
    }
    me.match_id = remote_proc;
    me.match_bits = match_bits;
    me.ignore_bits = ignore_bits;

    ret = PtlMEAppend(ompi_mtl_portals4.ni_h,
                      ompi_mtl_portals4.recv_idx,
                      &me,
                      PTL_PRIORITY_LIST,
                      ptl_request,
                      &ptl_request->me_h);
    if (OPAL_UNLIKELY(PTL_OK != ret)) {
        if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlMEAppend failed: %d",
                            __FILE__, __LINE__, ret);
        return ompi_mtl_portals4_get_error(ret);
    }

    /* if a long message, spin until we either have a comm event or a
       link event, guaranteeing progress for long unexpected
       messages. */
    if (length > ompi_mtl_portals4.short_limit) {
        while (true != ptl_request->req_started) {
            ompi_mtl_portals4_progress();
        }
    }

    return OMPI_SUCCESS;
}
Ejemplo n.º 15
0
int ompi_osc_pt2pt_compare_and_swap (const void *origin_addr, const void *compare_addr,
                                    void *result_addr, struct ompi_datatype_t *dt,
                                    int target, OPAL_PTRDIFF_TYPE target_disp,
                                    struct ompi_win_t *win)
{
    ompi_osc_pt2pt_module_t *module = GET_MODULE(win);
    ompi_proc_t *proc = ompi_comm_peer_lookup(module->comm, target);
    ompi_osc_pt2pt_frag_t *frag;
    ompi_osc_pt2pt_header_cswap_t *header;
    ompi_osc_pt2pt_sync_t *pt2pt_sync;
    size_t ddt_len, payload_len, frag_len;
    ompi_osc_pt2pt_request_t *request;
    const void *packed_ddt;
    int ret, tag;
    char *ptr;

    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
                         "cswap: 0x%lx, 0x%lx, 0x%lx, %s, %d, %d, %s",
                         (unsigned long) origin_addr, (unsigned long) compare_addr,
                         (unsigned long) result_addr, dt->name, target, (int) target_disp,
                         win->w_name));

    pt2pt_sync = ompi_osc_pt2pt_module_sync_lookup (module, target, NULL);
    if (OPAL_UNLIKELY(NULL == pt2pt_sync)) {
        return OMPI_ERR_RMA_SYNC;
    }

    /* optimize self case. TODO: optimize local case */
    if (ompi_comm_rank (module->comm) == target) {
        return ompi_osc_pt2pt_cas_self (pt2pt_sync, origin_addr, compare_addr, result_addr, dt, target_disp,
                                        module);
    }

    /* compare-and-swaps are always request based, so that we know where to land the data */
    OMPI_OSC_PT2PT_REQUEST_ALLOC(win, request);

    request->type = OMPI_OSC_PT2PT_HDR_TYPE_CSWAP;
    request->origin_addr = origin_addr;
    request->internal = true;
    OMPI_DATATYPE_RETAIN(dt);
    request->origin_dt = dt;

    /* Compute datatype and payload lengths.  Note that the datatype description
     * must fit in a single frag. It should be small in this case. */
    ddt_len = ompi_datatype_pack_description_length(dt);

    /* we need to send both the origin and compare buffers */
    payload_len = dt->super.size * 2;

    ret = ompi_datatype_get_pack_description(dt, &packed_ddt);
    if (OMPI_SUCCESS != ret) {
        return ret;
    }

    frag_len = sizeof(ompi_osc_pt2pt_header_cswap_t) + ddt_len + payload_len;
    ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr, false, false);
    if (OMPI_SUCCESS != ret) {
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

    tag = get_tag (module);
    ompi_osc_signal_outgoing (module, target, 1);

    header = (ompi_osc_pt2pt_header_cswap_t *) ptr;
    header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_CSWAP;
    header->base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID;
    header->len = frag_len;
    header->displacement = target_disp;
    header->tag = tag;
    osc_pt2pt_hton(header, proc);
    ptr += sizeof(ompi_osc_pt2pt_header_cswap_t);

    memcpy((unsigned char*) ptr, packed_ddt, ddt_len);
    ptr += ddt_len;

    /* pack the origin and compare data */
    osc_pt2pt_copy_for_send (ptr, dt->super.size, origin_addr, proc, 1, dt);
    ptr += dt->super.size;
    osc_pt2pt_copy_for_send (ptr, dt->super.size, compare_addr, proc, 1, dt);

    request->outstanding_requests = 1;
    ret = ompi_osc_pt2pt_irecv_w_cb (result_addr, 1, dt,
                                    target, tag_to_origin(tag), module->comm,
                                    NULL, ompi_osc_pt2pt_req_comm_complete, request);
    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
        return ret;
    }

    return ompi_osc_pt2pt_frag_finish (module, frag);
}
Ejemplo n.º 16
0
int
ompi_mtl_mx_send(struct mca_mtl_base_module_t* mtl, 
                 struct ompi_communicator_t* comm,
                 int dest,
                 int tag,
                 struct opal_convertor_t *convertor,
                 mca_pml_base_send_mode_t mode)
{
    mx_return_t mx_return;
    uint64_t match_bits;
    mca_mtl_mx_request_t mtl_mx_request;
    size_t length;
    mx_status_t mx_status;
    uint32_t result;
    ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest );
    mca_mtl_mx_endpoint_t* mx_endpoint = (mca_mtl_mx_endpoint_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
    char* where;

    assert(mtl == &ompi_mtl_mx.super);

    MX_SET_SEND_BITS(match_bits, comm->c_contextid, comm->c_my_rank, tag); 
    
    ompi_mtl_datatype_pack(convertor, 
                           &mtl_mx_request.mx_segment[0].segment_ptr, 
                           &length,
                           &mtl_mx_request.free_after);

    mtl_mx_request.mx_segment[0].segment_length = length;
    mtl_mx_request.convertor = convertor;
    mtl_mx_request.type = OMPI_MTL_MX_ISEND;

    OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                         "issend bits: 0x%016" PRIu64 "\n", 
                         match_bits));

    if(mode == MCA_PML_BASE_SEND_SYNCHRONOUS) { 
        mx_return = mx_issend( ompi_mtl_mx.mx_endpoint, 
                               mtl_mx_request.mx_segment, 
                               1,
                               mx_endpoint->mx_peer_addr, 
                               match_bits, 
                               &mtl_mx_request, 
                               &mtl_mx_request.mx_request
                               );
        where = "mx_issend";
    } else { 
        mx_return = mx_isend( ompi_mtl_mx.mx_endpoint, 
                              mtl_mx_request.mx_segment,
                              1,
                              mx_endpoint->mx_peer_addr,
                              match_bits,
                              &mtl_mx_request,
                              &mtl_mx_request.mx_request
                              );
        where = "mx_isend";
    }
    if( OPAL_UNLIKELY(mx_return != MX_SUCCESS) ) { 
        char peer_name[MX_MAX_HOSTNAME_LEN];
        if(MX_SUCCESS != mx_nic_id_to_hostname( mx_endpoint->mx_peer->nic_id, peer_name)) { 
            sprintf( peer_name, "unknown %lx nic_id", (long)mx_endpoint->mx_peer->nic_id ); 
        }
        opal_output(ompi_mtl_base_framework.framework_output, "Error in %s (error %s) sending to %s\n",
                    where, mx_strerror(mx_return), peer_name);

        /* Free buffer if needed */
        if(mtl_mx_request.free_after) { 
            free(mtl_mx_request.mx_segment[0].segment_ptr);
        }
        return OMPI_ERROR;
    }
    
    do { 
        mx_return = mx_test(ompi_mtl_mx.mx_endpoint, 
                            &mtl_mx_request.mx_request,
                            &mx_status,
                            &result);
        if( OPAL_UNLIKELY(mx_return != MX_SUCCESS) ) { 
            opal_output(ompi_mtl_base_framework.framework_output, "Error in mx_wait (error %s)\n", mx_strerror(mx_return));
            abort();
        }
        if( OPAL_UNLIKELY(result && mx_status.code != MX_STATUS_SUCCESS) ) { 
            opal_output(ompi_mtl_base_framework.framework_output, 
                        "Error in ompi_mtl_mx_send, mx_wait returned something other than MX_STATUS_SUCCESS: mx_status.code = %d.\n", 
                        mx_status.code);
            abort();
        }
    } while(!result);

    /* Free buffer if needed */
    if(mtl_mx_request.free_after) { 
        free(mtl_mx_request.mx_segment[0].segment_ptr);
    }
    
    return OMPI_SUCCESS;
}
Ejemplo n.º 17
0
int
ompi_mtl_portals_irecv(struct mca_mtl_base_module_t* mtl,
                       struct ompi_communicator_t *comm,
                       int src,
                       int tag,
                       struct ompi_convertor_t *convertor,
                       mca_mtl_request_t *mtl_request)
{
    ptl_match_bits_t match_bits, ignore_bits;
    ptl_md_t md;
    ptl_handle_md_t md_h;
    ptl_handle_me_t me_h;
    int ret;
    ptl_process_id_t remote_proc;
    mca_mtl_base_endpoint_t *endpoint = NULL;
    ompi_mtl_portals_request_t *ptl_request = 
        (ompi_mtl_portals_request_t*) mtl_request;
    ompi_mtl_portals_event_t *recv_event = NULL;
    size_t buflen;

    ptl_request->convertor = convertor;

    if  (MPI_ANY_SOURCE == src) {
        remote_proc.nid = PTL_NID_ANY;
        remote_proc.pid = PTL_PID_ANY;
    } else {
        ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, src );
        endpoint = (mca_mtl_base_endpoint_t*) ompi_proc->proc_pml;
        remote_proc = endpoint->ptl_proc;
    }

    PTL_SET_RECV_BITS(match_bits, ignore_bits, comm->c_contextid,
                      src, tag);

    OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
                         "recv bits: 0x%016llx 0x%016llx\n",
                         match_bits, ignore_bits));

    /* first, check the queue of processed unexpected messages */
    recv_event = ompi_mtl_portals_search_unex_q(match_bits, ignore_bits);
    if (NULL != recv_event) {
        /* found it */
        ompi_mtl_portals_get_data(recv_event, convertor, ptl_request);
        OMPI_FREE_LIST_RETURN(&ompi_mtl_portals.event_fl,
                              (ompi_free_list_item_t*)recv_event);
        goto cleanup;
    } else {
restart_search:
        /* check unexpected events */
        recv_event = ompi_mtl_portals_search_unex_events(match_bits, ignore_bits);
        if (NULL != recv_event) {
            /* found it */
            ompi_mtl_portals_get_data(recv_event, convertor, ptl_request);
            OMPI_FREE_LIST_RETURN(&ompi_mtl_portals.event_fl,
                                  (ompi_free_list_item_t*)recv_event);
            goto cleanup;
        }
    }

    /* didn't find it, now post the receive */
    ret = ompi_mtl_datatype_recv_buf(convertor, &md.start, &buflen,
                                     &ptl_request->free_after);
    md.length = buflen;

    /* create ME entry */
    ret = PtlMEInsert(ompi_mtl_portals.ptl_match_ins_me_h,
                remote_proc,
                match_bits,
                ignore_bits,
                PTL_UNLINK,
                PTL_INS_BEFORE,
                &me_h);
    if( ret !=PTL_OK) {
        return ompi_common_portals_error_ptl_to_ompi(ret);
    }

    /* associate a memory descriptor with the Match list Entry */
    md.threshold = 0;
    md.options = PTL_MD_OP_PUT | PTL_MD_TRUNCATE | PTL_MD_EVENT_START_DISABLE;
    md.user_ptr = ptl_request;
    md.eq_handle = ompi_mtl_portals.ptl_eq_h;
    ret=PtlMDAttach(me_h, md, PTL_UNLINK, &md_h);
    if( ret !=PTL_OK) {
        return ompi_common_portals_error_ptl_to_ompi(ret);
    }

    /* now try to make active */
    md.threshold = 1;

    /* enable the memory descritor, if the ptl_unexpected_recv_eq_h
     *   queue is empty */
    ret = PtlMDUpdate(md_h, NULL, &md,
                      ompi_mtl_portals.ptl_unexpected_recv_eq_h);
    if (ret == PTL_MD_NO_UPDATE) {
        /* a message has arrived since we searched - look again */
        PtlMDUnlink(md_h);
        if (ptl_request->free_after) { free(md.start); }
        goto restart_search;
    } else if( PTL_OK != ret ) {
        return ompi_common_portals_error_ptl_to_ompi(ret);
    }

    ptl_request->event_callback = ompi_mtl_portals_recv_progress;

 cleanup:

    return OMPI_SUCCESS;
}
Ejemplo n.º 18
0
int
ompi_mtl_mx_isend(struct mca_mtl_base_module_t* mtl, 
                  struct ompi_communicator_t* comm,
                  int dest,
                  int tag,
                  struct opal_convertor_t *convertor,
                  mca_pml_base_send_mode_t mode,
                  bool blocking,
                  mca_mtl_request_t * mtl_request)
{
    mx_return_t mx_return;
    uint64_t match_bits;
    mca_mtl_mx_request_t * mtl_mx_request = (mca_mtl_mx_request_t*) mtl_request;
    size_t length;
    ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest );
    mca_mtl_mx_endpoint_t* mx_endpoint = (mca_mtl_mx_endpoint_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
    char* where;

    assert(mtl == &ompi_mtl_mx.super);

    MX_SET_SEND_BITS(match_bits, comm->c_contextid, comm->c_my_rank, tag); 
    
    ompi_mtl_datatype_pack(convertor, 
                           &mtl_mx_request->mx_segment[0].segment_ptr, 
                           &length,
                           &mtl_mx_request->free_after);
    mtl_mx_request->mx_segment[0].segment_length = length;
    mtl_mx_request->convertor = convertor;
    mtl_mx_request->type = OMPI_MTL_MX_ISEND;

    OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                         "issend bits: 0x%016" PRIu64 "\n", match_bits));

    if(mode == MCA_PML_BASE_SEND_SYNCHRONOUS) { 
        mx_return = mx_issend( ompi_mtl_mx.mx_endpoint, 
                               mtl_mx_request->mx_segment, 
                               1,
                               mx_endpoint->mx_peer_addr, 
                               match_bits, 
                               mtl_mx_request, 
                               &mtl_mx_request->mx_request
                               );
        where = "mx_issend";
    } else { 
        mx_return = mx_isend( ompi_mtl_mx.mx_endpoint, 
                              mtl_mx_request->mx_segment,
                              1,
                              mx_endpoint->mx_peer_addr,
                              match_bits,
                              mtl_mx_request,
                              &mtl_mx_request->mx_request
                              );
        where = "mx_isend";
    }
    if( OPAL_UNLIKELY(mx_return != MX_SUCCESS) ) { 
        char peer_name[MX_MAX_HOSTNAME_LEN];
        if(MX_SUCCESS != mx_nic_id_to_hostname( mx_endpoint->mx_peer->nic_id, peer_name)) { 
            sprintf( peer_name, "unknown %lx nic_id", (long)mx_endpoint->mx_peer->nic_id ); 
        }
        opal_output(ompi_mtl_base_framework.framework_output, "Error in %s (error %s) sending to %s\n",
                    where, mx_strerror(mx_return), peer_name);
        return OMPI_ERROR;
    }
    return OMPI_SUCCESS;
}
static mca_sbgp_base_module_t *mca_sbgp_basesmsocket_select_procs(struct ompi_proc_t ** procs,
        int n_procs_in,
        struct ompi_communicator_t *comm,
        char *key,
        void *output_data
                                                                 )
{
    /* local variables */
    mca_sbgp_basesmsocket_module_t *module;
    /*
    opal_buffer_t* sbuffer = OBJ_NEW(opal_buffer_t);
    opal_buffer_t* rbuffer = OBJ_NEW(opal_buffer_t);
    */
    opal_paffinity_base_cpu_set_t my_cpu_set;
    bool bound;
    int ret;
    int num_processors;
    int socket_tmp;
    int my_socket_index;
    int core_index=-1;
    int proc, cnt, local, n_local_peers, my_index, my_rank;
    ompi_proc_t* my_proc;
    int *local_ranks_in_comm=NULL;
    int *socket_info=NULL, my_socket_info;
    int  i_cnt, lp_cnt, my_local_index, comm_size=ompi_comm_size(comm);

    /* initialize data */
    output_data=NULL;
    my_rank=ompi_comm_rank(comm);
    my_proc=ompi_comm_peer_lookup(comm,my_rank);
    for( proc=0 ; proc < n_procs_in ; proc++) {
        if( procs[proc]==my_proc) {
            my_index=proc;
        }
    }

    /*create a new module*/
    module=OBJ_NEW(mca_sbgp_basesmsocket_module_t);
    if (!module ) {
        return NULL;
    }
    module->super.group_size=0;
    module->super.group_comm = comm;
    module->super.group_list = NULL;
    module->super.group_net = OMPI_SBGP_SOCKET;

    /*
        ** get my process affinity information
        ** */

    /* get the number of processors on this node */

    ret=opal_paffinity_base_get_processor_info(&num_processors);

    /* get process affinity mask */
    OPAL_PAFFINITY_CPU_ZERO(my_cpu_set);
    ret=opal_paffinity_base_get(&my_cpu_set);
    OPAL_PAFFINITY_PROCESS_IS_BOUND(my_cpu_set,&bound);

    /*debug process affinity*/
    /*
    {
        ret=opal_paffinity_base_get_socket_info(&num_socket);
        fprintf(stderr,"Number of sockets %d\n",num_socket);
        fprintf(stderr,"Test if rank %d is bound %d\n", my_rank, bound);
        fprintf(stderr,"return from opal_paffinity_base_get: %d\n\n",ret);
        fprintf(stderr,"bitmask elements: ");
        unsigned int long  jj;
        for(jj=0; jj < OPAL_PAFFINITY_BITMASK_NUM_ELEMENTS; jj++)
                 fprintf(stderr," %d ",my_cpu_set.bitmask[jj]);
        fprintf(stderr,"\n");
        fflush(stderr);
    }
    end debug process affinity*/

    if( !bound ) {

        /* pa affinity not set, so socket index will be set to -1 */
        my_socket_index=-1;
        /*debug print*/
        /* */
        fprintf(stderr,"[%d]FAILED to set basesmsocket group !!!\n",my_rank);
        fflush(stderr);
        /*end debug*/
        goto NoLocalPeers;
    } else {

        my_socket_index=-1;
        /* loop over number of processors */
        for ( proc=0 ; proc < num_processors ; proc++ ) {
            if (OPAL_PAFFINITY_CPU_ISSET(proc,my_cpu_set)) {
                ret=opal_paffinity_base_get_map_to_socket_core(proc,&socket_tmp,&core_index);
                if( my_socket_index != socket_tmp ) {
                    my_socket_index=socket_tmp;
                    break;
                }
            }
        } /* end of proc loop */
    }

    /* Debug prints */
    /*
    {
    fprintf(stderr,"Number of processors per node: %d\n",num_processors);
    fprintf(stderr,"I am rank %d and my socket index is %d\n and my core index is %d\n",my_rank,my_socket_index,core_index);
    fprintf(stderr,"n_proc_in = %d\n",n_procs_in);
    fprintf(stderr,"\n");
    fflush(stderr);
    }
    end debug prints */


    /*get my socket index*/
    cnt=0;
    for( proc=0 ; proc < n_procs_in ; proc++) {
        local=OPAL_PROC_ON_LOCAL_NODE(procs[proc]->proc_flags);
        if( local ) {
            cnt++;
        }
    }
    /*debug print */
    /*
    fprintf(stderr,"Number of local processors %d\n",cnt);
    end debug print*/

    /* if no other local procs found skip to end */
    if( 1 >= cnt ) {
        goto NoLocalPeers;
    }


#if 0
    int *local_ranks_in_comm;
    int32_t *socket_info, *my_socket_info;
    int  my_local_index;
#endif
    /* allocate structure to hold the list of local ranks */
    local_ranks_in_comm=(int *)malloc(sizeof(int)*cnt);
    if(NULL == local_ranks_in_comm ) {
        goto Error;
    }
    /* figure out which ranks from the input communicator - comm - will
     * particiapte in the local socket determination.
     */

    n_local_peers=0;
    i_cnt=0;
    for( proc = 0; proc < n_procs_in; proc++) {
        local = OPAL_PROC_ON_LOCAL_NODE(procs[proc]->proc_flags);
        if ( local ) {

            /* set the rank within the on-host ranks - this will be used for tha
             * allgather
             */
            if( my_proc == procs[proc] ) {
                my_local_index=n_local_peers;
            }
            /* find the rank of the current proc in comm.  We take advantage
             * of the fact that ranks in a group have the same relative
             * ordering as they do within the communicator.
             */
#if 1
            /*for( lp_cnt=i_cnt; lp_cnt < comm_size ; lp_cnt++ ) {*/
            for( lp_cnt=proc; lp_cnt < comm_size ; lp_cnt++ ) {
                if(procs[proc] == ompi_comm_peer_lookup(comm,lp_cnt) ) {
                    local_ranks_in_comm[i_cnt]=lp_cnt;
                    /* lp_cnt has alrady been checked */
                    i_cnt++;
                    /* found the corresponding rank in comm, so don't need
                     * to search any more */
                    break;
                }
                /*i_cnt++;*/
                /*fprintf(stderr,"QQQ i_cnt %d \n",i_cnt);*/
            }
#endif
            n_local_peers++;
        }
    }
    /*fprintf(stderr,"YYY n_local_peers %d\n",n_local_peers);*/
    socket_info=(int *)malloc(sizeof(int)*n_local_peers);
    /*fprintf(stderr,"XXX got socket info\n");*/
    if(NULL == socket_info ) {
        goto Error;
    }

    my_socket_info=my_socket_index;

    /* Allgather data over the communicator */
    ret=comm_allgather_pml(&my_socket_info, socket_info, 1,
                           MPI_INT, my_local_index, n_local_peers, local_ranks_in_comm,comm);
    if (OMPI_SUCCESS != ret ) {
        fprintf(stderr," comm_allgather_pml returned error %d \n", ret);
        fflush(stderr);
        return NULL;
    }


    /*allocate memory to the group_list probably an overestimation
      of the necessary resources */
    module->super.group_list=(int *)malloc(sizeof(int)*cnt);
    if(NULL == module->super.group_list) {
        goto Error;
    }

    /* figure out who is sharing the same socket */
    cnt=0;
    for (proc = 0; proc < n_local_peers; proc++) {
        int rem_rank=local_ranks_in_comm[proc];
        int rem_socket_index=socket_info[proc];

        /*Populate the list*/
        if (rem_socket_index == my_socket_index) {
            module->super.group_list[cnt]=rem_rank;
            cnt++;
        }
    }

    module->super.group_size=cnt;

    /*debug print*/
    /*
    {
        int ii;
        fprintf(stderr,"Ranks per socket: %d\n",cnt);
        fprintf(stderr,"Socket %d owns ranks: ", my_socket_index);
        for (ii=0; ii < cnt; ii++)
            fprintf(stderr,"%d ",module->super.group_list[ii]);
        fprintf(stderr,"\n");
        fflush(stderr);
    }

    {
        cpu_set_t set;
        unsigned int len = sizeof(set);
        int i;
        unsigned long mask = 0;
        CPU_ZERO(&set);
        if (sched_getaffinity(0, len, &set) < 0) {
            perror("sched_getaffinity");
            return -1;
        }
        for (i = 0; i < CPU_SETSIZE; i++) {
            int cpu = CPU_ISSET(i, &set);
            if (cpu) {
                mask |= 1<< i;
            }
        }
        opal_output(0,"%d: my affinity mask is: %08lx\n", my_local_index,mask);
    }


    end debug*/


    /*Free resources*/
    free(local_ranks_in_comm);
    free(socket_info);

    /*Return the module*/
    return (mca_sbgp_base_module_t *) module;


NoLocalPeers:
    /* nothing to store, so just free the module and return */
    /*fprintf(stderr,"No local socket peers\n");*/
    /*free(module);*/
    if(socket_info) {
        free(socket_info);
        socket_info=NULL;
    }
    if(local_ranks_in_comm) {
        free(local_ranks_in_comm);
    }
    OBJ_RELEASE(module);
    return NULL;

Error:
    /*clean up*/
    if( NULL != module->super.group_list) {
        free(module->super.group_list);
        module->super.group_list=NULL;
    }
    if(socket_info) {
        free(socket_info);
        socket_info=NULL;
    }
    if(local_ranks_in_comm) {
        free(local_ranks_in_comm);
    }
    OBJ_RELEASE(module);
    return NULL;


}
static mca_sbgp_base_module_t *mca_sbgp_basesmsocket_select_procs(struct ompi_proc_t ** procs,
        int n_procs_in,
        struct ompi_communicator_t *comm,
        char *key,
        void *output_data
                                                                 )
{
    /* local variables */
    mca_sbgp_basesmsocket_module_t *module;
    int ret;
    int my_socket_index;
    int proc, cnt, local, n_local_peers, my_rank;
    ompi_proc_t* my_proc;
    int *local_ranks_in_comm=NULL;
    int *socket_info=NULL, my_socket_info;
    int  i_cnt, lp_cnt, my_local_index = -1, comm_size=ompi_comm_size(comm);

    /* initialize data */
    output_data=NULL;
    my_rank=ompi_comm_rank(comm);
    my_proc=ompi_comm_peer_lookup(comm,my_rank);

    /*create a new module*/
    module=OBJ_NEW(mca_sbgp_basesmsocket_module_t);
    if (!module ) {
        return NULL;
    }
    module->super.group_size=0;
    module->super.group_comm = comm;
    module->super.group_list = NULL;
    module->super.group_net = OMPI_SBGP_SOCKET;

    /* test to see if process is bound */
    if( OPAL_BIND_TO_NONE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy) ) {

        /* pa affinity not set, so socket index will be set to -1 */
        my_socket_index=-1;
        /*debug print*/
        /* */
        BASESMSOCKET_VERBOSE(10, ("[%d] FAILED to set basesmsocket group, processes are not bound!!!\n",my_rank));
        /*end debug*/
        goto NoLocalPeers;
    } else {

        my_socket_index=-1;
        /* this should find my logical socket id which is the socket id we want
         * physical socket ids are not necessarily unique, logical ones, as defined
         * by the hwloc API are unique.
         */
        if( OMPI_SUCCESS != mca_sbgp_map_to_logical_socket_id(&my_socket_index)) {
            BASESMSOCKET_VERBOSE(10, ("[%d] FAILED to set basesmsocket group !!!\n",my_rank));

            goto NoLocalPeers;
        }
    }

    /* Debug prints */
    /*
       {
       fprintf(stderr,"Number of processors per node: %d\n",num_processors);
       fprintf(stderr,"I am rank %d and my socket index is %d\n and my core index is %d\n",my_rank,my_socket_index,core_index);
       fprintf(stderr,"n_proc_in = %d\n",n_procs_in);
       fprintf(stderr,"\n");
       fflush(stderr);
       }
       end debug prints */


    /*get my socket index*/
    cnt=0;
    for( proc=0 ; proc < n_procs_in ; proc++) {
        local=OPAL_PROC_ON_LOCAL_NODE(procs[proc]->proc_flags);
        if( local ) {
            cnt++;
        }
    }
    /*debug print */
    /*
    fprintf(stderr,"Number of local processors %d\n",cnt);
    end debug print*/

    /* if no other local procs found skip to end */
    if( 1 >= cnt ) {
        goto NoLocalPeers;
    }



    /* allocate structure to hold the list of local ranks */
    local_ranks_in_comm=(int *)malloc(sizeof(int)*cnt);
    if(NULL == local_ranks_in_comm ) {
        goto Error;
    }
    /* figure out which ranks from the input communicator - comm - will
     * particiapte in the local socket determination.
     */

    n_local_peers=0;
    i_cnt=0;
    for( proc = 0; proc < n_procs_in; proc++) {
        local = OPAL_PROC_ON_LOCAL_NODE(procs[proc]->proc_flags);
        if ( local ) {

            /* set the rank within the on-host ranks - this will be used for tha
             * allgather
             */
            if( my_proc == procs[proc] ) {
                my_local_index=n_local_peers;
            }
            /* find the rank of the current proc in comm.  We take advantage
             * of the fact that ranks in a group have the same relative
             * ordering as they do within the communicator.
             */
            for( lp_cnt=proc; lp_cnt < comm_size ; lp_cnt++ ) {
                if(procs[proc] == ompi_comm_peer_lookup(comm,lp_cnt) ) {
                    local_ranks_in_comm[i_cnt]=lp_cnt;
                    /* lp_cnt has alrady been checked */
                    i_cnt++;
                    /* found the corresponding rank in comm, so don't need
                     * to search any more */
                    break;
                }
                /*i_cnt++;*/
                /*fprintf(stderr,"QQQ i_cnt %d \n",i_cnt);*/
            }
            n_local_peers++;
        }
    }
    /*fprintf(stderr,"YYY n_local_peers %d\n",n_local_peers);*/
    socket_info=(int *)malloc(sizeof(int)*n_local_peers);
    /*fprintf(stderr,"XXX got socket info\n");*/
    if(NULL == socket_info ) {
        goto Error;
    }

    my_socket_info=my_socket_index;

    /* Allgather data over the communicator */
    ret=comm_allgather_pml(&my_socket_info, socket_info, 1,
                           MPI_INT, my_local_index, n_local_peers, local_ranks_in_comm,comm);
    if (OMPI_SUCCESS != ret ) {
        BASESMSOCKET_VERBOSE(10, ("comm_allgather_pml returned error %d\n",ret));
        return NULL;
    }


    /*allocate memory to the group_list probably an overestimation
      of the necessary resources */
    module->super.group_list=(int *)malloc(sizeof(int)*cnt);
    if(NULL == module->super.group_list) {
        goto Error;
    }

    /* figure out who is sharing the same socket */
    cnt=0;
    for (proc = 0; proc < n_local_peers; proc++) {
        int rem_rank=local_ranks_in_comm[proc];
        int rem_socket_index=socket_info[proc];

        /*Populate the list*/
        if (rem_socket_index == my_socket_index) {
            module->super.group_list[cnt]=rem_rank;
            cnt++;
        }
    }

    module->super.group_size=cnt;

#if 0
    /*debug print*/

    {
        int ii;
        fprintf(stderr,"Ranks per socket: %d\n",cnt);
        fprintf(stderr,"Socket %d owns ranks: ", my_socket_index);
        for (ii=0; ii < cnt; ii++)
            fprintf(stderr,"%d ",module->super.group_list[ii]);
        fprintf(stderr,"\n");
        fflush(stderr);
    }
#endif

    /* end debug*/


    /*Free resources*/
    free(local_ranks_in_comm);
    free(socket_info);

    /*Return the module*/
    return (mca_sbgp_base_module_t *) module;


NoLocalPeers:
    /* nothing to store, so just free the module and return */
    /*fprintf(stderr,"No local socket peers\n");*/
    /*free(module);*/
    if(socket_info) {
        free(socket_info);
        socket_info=NULL;
    }
    if(local_ranks_in_comm) {
        free(local_ranks_in_comm);
    }
    OBJ_RELEASE(module);
    return NULL;

Error:
    /*clean up*/
    if( NULL != module->super.group_list) {
        free(module->super.group_list);
        module->super.group_list=NULL;
    }
    if(socket_info) {
        free(socket_info);
        socket_info=NULL;
    }
    if(local_ranks_in_comm) {
        free(local_ranks_in_comm);
    }
    OBJ_RELEASE(module);
    return NULL;


}
Ejemplo n.º 21
0
static int
ompi_coll_portals4_scatter_intra_linear_bottom(struct ompi_communicator_t *comm,
        ompi_coll_portals4_request_t *request)
{
    int ret, line;

    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
                 "coll:portals4:scatter_intra_linear_bottom enter rank %d", request->u.scatter.my_rank));

    ret = cleanup_scatter_handles(request);
    if (MPI_SUCCESS != ret) {
        line = __LINE__;
        goto err_hdlr;
    }

    ret = cleanup_sync_handles(request);
    if (MPI_SUCCESS != ret) {
        line = __LINE__;
        goto err_hdlr;
    }

    if (NULL != request->u.scatter.unpack_dst_buf) {
        uint32_t iov_count = 1;
        struct iovec iov;
        size_t max_data;

        ompi_coll_portals4_create_recv_converter (&request->u.scatter.recv_converter,
                request->u.scatter.unpack_dst_buf,
                ompi_comm_peer_lookup(comm, request->u.scatter.my_rank),
                request->u.scatter.unpack_dst_count,
                request->u.scatter.unpack_dst_dtype);

        iov.iov_len = request->u.scatter.packed_size;
        if (request->u.scatter.my_rank == request->u.scatter.root_rank) {
            /* unpack my data from the location in scatter_buf where is was packed */
            uint64_t offset = request->u.scatter.pack_src_extent * request->u.scatter.pack_src_count * request->u.scatter.my_rank;
            iov.iov_base = (IOVBASE_TYPE *)((char *)request->u.scatter.scatter_buf + offset);
        } else {
            iov.iov_base = (IOVBASE_TYPE *)request->u.scatter.scatter_buf;
        }
        opal_convertor_unpack(&request->u.scatter.recv_converter, &iov, &iov_count, &max_data);

        OBJ_DESTRUCT(&request->u.scatter.recv_converter);
    }

    if (request->u.scatter.free_after)
        free(request->u.scatter.scatter_buf);

    request->super.req_status.MPI_ERROR = OMPI_SUCCESS;

    OPAL_THREAD_LOCK(&ompi_request_lock);
    ompi_request_complete(&request->super, true);
    OPAL_THREAD_UNLOCK(&ompi_request_lock);

    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
                 "coll:portals4:scatter_intra_linear_bottom exit rank %d", request->u.scatter.my_rank));

    return OMPI_SUCCESS;

err_hdlr:
    request->super.req_status.MPI_ERROR = ret;

    if (request->u.scatter.free_after)
        free(request->u.scatter.scatter_buf);

    opal_output(ompi_coll_base_framework.framework_output,
                "%s:%4d:%4d\tError occurred ret=%d, rank %2d",
                __FILE__, __LINE__, line, ret, request->u.scatter.my_rank);

    return ret;
}
Ejemplo n.º 22
0
int
ompi_osc_pt2pt_sendreq_recv_put(ompi_osc_pt2pt_module_t *module,
                                ompi_osc_pt2pt_send_header_t *header,
                                void *inbuf)
{
    int ret = OMPI_SUCCESS;
    void *target = (unsigned char*) module->p2p_win->w_baseptr + 
        (header->hdr_target_disp * module->p2p_win->w_disp_unit);    
    ompi_proc_t *proc = ompi_comm_peer_lookup( module->p2p_comm, header->hdr_origin );
    struct ompi_datatype_t *datatype = 
        ompi_osc_pt2pt_datatype_create(proc, &inbuf);

    if (header->hdr_msg_length > 0) {
        ompi_convertor_t convertor;
        struct iovec iov;
        uint32_t iov_count = 1;
        size_t max_data;
        ompi_proc_t *proc;

        /* create convertor */
        OBJ_CONSTRUCT(&convertor, ompi_convertor_t);

        /* initialize convertor */
        proc = ompi_comm_peer_lookup(module->p2p_comm, header->hdr_origin);
        ompi_convertor_copy_and_prepare_for_recv(proc->proc_convertor,
                                                 datatype,
                                                 header->hdr_target_count,
                                                 target,
                                                 0,
                                                 &convertor);
        iov.iov_len = header->hdr_msg_length;
        iov.iov_base = (IOVBASE_TYPE*)inbuf;
        max_data = iov.iov_len;
        ompi_convertor_unpack(&convertor, 
                              &iov,
                              &iov_count,
                              &max_data );
        OBJ_DESTRUCT(&convertor);
        OBJ_RELEASE(datatype);
        OPAL_THREAD_ADD32(&(module->p2p_num_pending_in), -1);
    
    } else {
            ompi_osc_pt2pt_longreq_t *longreq;
            ompi_osc_pt2pt_longreq_alloc(&longreq);

            longreq->req_comp_cb = ompi_osc_pt2pt_sendreq_recv_put_long_cb;
            longreq->req_comp_cbdata = NULL;
            longreq->req_datatype = datatype;
            longreq->req_module = module;

            ret = mca_pml.pml_irecv(target,
                                    header->hdr_target_count,
                                    datatype,
                                    header->hdr_origin,
                                    header->hdr_origin_tag,
                                    module->p2p_comm,
                                    &(longreq->req_pml_req));

            /* put the send request in the waiting list */
            OPAL_THREAD_LOCK(&(module->p2p_lock));
            opal_list_append(&(module->p2p_long_msgs), 
                             &(longreq->super.super));
            OPAL_THREAD_UNLOCK(&(module->p2p_lock));
    }

    return ret;
}
Ejemplo n.º 23
0
static int
ompi_osc_pt2pt_accumulate_w_req (const void *origin_addr, int origin_count,
                                struct ompi_datatype_t *origin_dt,
                                int target, OPAL_PTRDIFF_TYPE target_disp,
                                int target_count,
                                struct ompi_datatype_t *target_dt,
                                struct ompi_op_t *op, ompi_win_t *win,
                                ompi_osc_pt2pt_request_t *request)
{
    int ret;
    ompi_osc_pt2pt_module_t *module = GET_MODULE(win);
    ompi_proc_t *proc = ompi_comm_peer_lookup(module->comm, target);
    bool is_long_datatype = false;
    bool is_long_msg = false;
    ompi_osc_pt2pt_frag_t *frag;
    ompi_osc_pt2pt_header_acc_t *header;
    ompi_osc_pt2pt_sync_t *pt2pt_sync;
    size_t ddt_len, payload_len, frag_len;
    char *ptr;
    const void *packed_ddt;
    int tag = -1;

    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
                         "acc: 0x%lx, %d, %s, %d, %d, %d, %s, %s, %s",
                         (unsigned long) origin_addr, origin_count,
                         origin_dt->name, target, (int) target_disp,
                         target_count, target_dt->name, op->o_name,
                         win->w_name));

    pt2pt_sync = ompi_osc_pt2pt_module_sync_lookup (module, target, NULL);
    if (OPAL_UNLIKELY(NULL == pt2pt_sync)) {
        return OMPI_ERR_RMA_SYNC;
    }

    /* short-circuit case */
    if (0 == origin_count || 0 == target_count) {
        if (request) {
            ompi_osc_pt2pt_request_complete (request, MPI_SUCCESS);
        }

        return OMPI_SUCCESS;
    }

    /* optimize the self case. TODO: optimize the local case */
    if (ompi_comm_rank (module->comm) == target) {
        return ompi_osc_pt2pt_acc_self (pt2pt_sync, origin_addr, origin_count, origin_dt,
                                        target_disp, target_count, target_dt,
                                        op, module, request);
    }

    /* Compute datatype and payload lengths.  Note that the datatype description
     * must fit in a single frag */
    ddt_len = ompi_datatype_pack_description_length(target_dt);
    payload_len = origin_dt->super.size * origin_count;

    frag_len = sizeof(*header) + ddt_len + payload_len;
    ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr, false, true);
    if (OMPI_SUCCESS != ret) {
        frag_len = sizeof(*header) + ddt_len;
        ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr, true, !request);
        if (OMPI_SUCCESS != ret) {
            /* allocate space for the header plus space to store ddt_len */
            frag_len = sizeof(*header) + 8;
            ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr, true, !request);
            if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
                return OMPI_ERR_OUT_OF_RESOURCE;
            }

            is_long_datatype = true;
         }

        is_long_msg = true;
        tag = get_tag (module);
    } else {
        /* still need to set the tag for the active/passive logic on the target */
        tag = !!(module->passive_target_access_epoch);
    }

    if (is_long_msg) {
        /* wait for synchronization before posting a long message */
        if (pt2pt_sync->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK) {
            OPAL_THREAD_LOCK(&pt2pt_sync->lock);
            ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, target);
            while (!(peer->flags & OMPI_OSC_PT2PT_PEER_FLAG_EAGER)) {
                opal_condition_wait(&pt2pt_sync->cond, &pt2pt_sync->lock);
            }
            OPAL_THREAD_UNLOCK(&pt2pt_sync->lock);
        } else {
            ompi_osc_pt2pt_sync_wait_expected (pt2pt_sync);
        }
    }

    header = (ompi_osc_pt2pt_header_acc_t*) ptr;
    header->base.flags = 0;
    header->len = frag_len;
    header->count = target_count;
    header->displacement = target_disp;
    header->op = op->o_f_to_c_index;
    header->tag = tag;
    ptr += sizeof (*header);

    do {
        ret = ompi_datatype_get_pack_description(target_dt, &packed_ddt);
        if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
            break;
        }

        if (is_long_datatype) {
            /* the datatype does not fit in an eager message. send it seperately */
            header->base.flags |= OMPI_OSC_PT2PT_HDR_FLAG_LARGE_DATATYPE;

            OMPI_DATATYPE_RETAIN(target_dt);

            ret = ompi_osc_pt2pt_isend_w_cb ((void *) packed_ddt, ddt_len, MPI_BYTE,
                                            target, tag_to_target(tag), module->comm,
                                            ompi_osc_pt2pt_dt_send_complete, target_dt);
            if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
                break;
            }

            *((uint64_t *) ptr) = ddt_len;
            ptr += 8;
        } else {
            memcpy((unsigned char*) ptr, packed_ddt, ddt_len);
            ptr += ddt_len;
        }

        if (!is_long_msg) {
            header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_ACC;
            osc_pt2pt_hton(header, proc);

            osc_pt2pt_copy_for_send (ptr, payload_len, origin_addr, proc,
                                    origin_count, origin_dt);

            /* the user's buffer is no longer needed so mark the request as
             * complete. */
            if (request) {
                ompi_osc_pt2pt_request_complete (request, MPI_SUCCESS);
            }
        } else {
            header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_ACC_LONG;
            osc_pt2pt_hton(header, proc);

            OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
                                 "acc: starting long accumulate with tag %d", tag));

            ret = ompi_osc_pt2pt_data_isend (module, origin_addr, origin_count, origin_dt,
                                            target, tag_to_target(tag), request);
        }
    } while (0);

    if (OMPI_SUCCESS != ret) {
        OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
                             "acc: failed with eror %d", ret));
    } else {
        /* mark the fragment as valid */
        header->base.flags |= OMPI_OSC_PT2PT_HDR_FLAG_VALID;
    }

    return ompi_osc_pt2pt_frag_finish(module, frag);
}
Ejemplo n.º 24
0
int bcol_basesmuma_smcm_allgather_connection(
                                             mca_bcol_basesmuma_module_t *sm_bcol_module,
                                             mca_sbgp_base_module_t *module,
                                             opal_list_t *peer_list,
                                             bcol_basesmuma_smcm_proc_item_t ***back_files,
                                             ompi_communicator_t *comm,
                                             bcol_basesmuma_smcm_file_t input,
                                             char *base_fname,
                                             bool map_all)
{

    /* define local variables */

    int rc, i, fd;
    ptrdiff_t mem_offset;
    ompi_proc_t *proc_temp, *my_id;
    bcol_basesmuma_smcm_proc_item_t *temp;
    bcol_basesmuma_smcm_proc_item_t *item_ptr;
    bcol_basesmuma_smcm_proc_item_t **backing_files;
    struct file_info_t local_file;
    struct file_info_t *all_files=NULL;

    /* sanity check */
    if (strlen(input.file_name) > SM_BACKING_FILE_NAME_MAX_LEN-1) {
        opal_output (ompi_bcol_base_framework.framework_output, "backing file name too long:  %s len :: %d",
                     input.file_name, (int) strlen(input.file_name));
        return OMPI_ERR_BAD_PARAM;
    }

    backing_files = (bcol_basesmuma_smcm_proc_item_t **)
        calloc(module->group_size, sizeof(bcol_basesmuma_smcm_proc_item_t *));
    if (!backing_files) {
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

    /* FIXME *back_files might have been already allocated
     * so free it in order to avoid a memory leak */
    if (NULL != *back_files) {
        free (*back_files);
    }
    *back_files = backing_files;

    my_id = ompi_proc_local();

    /* Phase One:
       gather a list of processes that will participate in the allgather - I'm
       preparing this list from the sbgp-ing module that was passed into the function */

    /* fill in local file information */
    local_file.vpid  = ((orte_process_name_t*)&my_id->super.proc_name)->vpid;
    local_file.jobid = ((orte_process_name_t*)&my_id->super.proc_name)->jobid;
    local_file.file_size=input.size;
    local_file.size_ctl_structure=input.size_ctl_structure;
    local_file.data_seg_alignment=input.data_seg_alignment;

    strcpy (local_file.file_name, input.file_name);

    /* will exchange this data type as a string of characters -
     * this routine is first called before MPI_init() completes
     * and before error handling is setup, so can't use the
     * MPI data types to send this data */
    all_files = (struct file_info_t *) calloc(module->group_size,
                                              sizeof (struct file_info_t));
    if (!all_files) {
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

    /* exchange data */
    rc = comm_allgather_pml(&local_file,all_files,sizeof(struct file_info_t), MPI_CHAR,
                            sm_bcol_module->super.sbgp_partner_module->my_index,
                            sm_bcol_module->super.sbgp_partner_module->group_size,
                            sm_bcol_module->super.sbgp_partner_module->group_list,
                            sm_bcol_module->super.sbgp_partner_module->group_comm);
    if( OMPI_SUCCESS != rc ) {
        opal_output (ompi_bcol_base_framework.framework_output, "failed in comm_allgather_pml.  Error code: %d", rc);
        goto Error;
    }

    /* Phase four:
       loop through the receive buffer, unpack the data recieved from remote peers */

    for (i = 0; i < module->group_size; i++) {
        struct file_info_t *rem_file = all_files + i;

        /* check if this is my index or if the file is already mapped (set above). ther
         * is no reason to look through the peer list again because no two members of
         * the group will have the same vpid/jobid pair. ignore this previously found
         * mapping if map_all was requested (NTH: not sure why exactly since we re-map
         * and already mapped file) */
        if (sm_bcol_module->super.sbgp_partner_module->my_index == i) {
            continue;
        }

        proc_temp = ompi_comm_peer_lookup(comm,module->group_list[i]);

        OPAL_LIST_FOREACH(item_ptr, peer_list, bcol_basesmuma_smcm_proc_item_t) {
            /* if the vpid/jobid/filename combination already exists in the list,
               then do not map this peer's file --- because you already have */
            if (0 == ompi_rte_compare_name_fields(OMPI_RTE_CMP_ALL,
                                                  OMPI_CAST_RTE_NAME(&proc_temp->super.proc_name),
                                                  &item_ptr->peer) &&
                0 == strcmp (item_ptr->sm_file.file_name, rem_file->file_name)) {
                ++item_ptr->refcnt;
                /* record file data */
                backing_files[i] = item_ptr;
                break;
            }
        }

        if (!map_all && backing_files[i]) {
            continue;
        }

        temp = OBJ_NEW(bcol_basesmuma_smcm_proc_item_t);
        if (!temp) {
            rc = OMPI_ERR_OUT_OF_RESOURCE;
            goto Error;
        }

        temp->peer.vpid = rem_file->vpid;
        temp->peer.jobid = rem_file->jobid;

        temp->sm_file.file_name = strdup (rem_file->file_name);
        if (!temp->sm_file.file_name) {
            rc = OMPI_ERR_OUT_OF_RESOURCE;
            OBJ_RELEASE(temp);
            goto Error;
        }

        temp->sm_file.size = (size_t) rem_file->file_size;
        temp->sm_file.mpool_size = (size_t) rem_file->file_size;
        temp->sm_file.size_ctl_structure = (size_t) rem_file->size_ctl_structure;
        temp->sm_file.data_seg_alignment = (size_t) rem_file->data_seg_alignment;
        temp->refcnt = 1;

        /* Phase Five:
           If map_all == true, then  we map every peer's file
           else we check to see if I have already mapped this
           vpid/jobid/filename combination and if I have, then
           I do not mmap this peer's file.
           *
           */
        fd = open(temp->sm_file.file_name, O_RDWR, 0600);
        if (0 > fd) {
            opal_output (ompi_bcol_base_framework.framework_output, "SMCM Allgather failed to open sm backing file %s. errno = %d",
                         temp->sm_file.file_name, errno);
            rc = OMPI_ERROR;
            goto Error;
        }

        /* map the file */
        temp->sm_mmap = bcol_basesmuma_smcm_reg_mmap (NULL, fd, temp->sm_file.size,
                                                      temp->sm_file.size_ctl_structure,
                                                      temp->sm_file.data_seg_alignment,
                                                      temp->sm_file.file_name);
        close (fd);
        if (NULL == temp->sm_mmap) {
            opal_output (ompi_bcol_base_framework.framework_output, "mmapping failed to map remote peer's file");
            OBJ_RELEASE(temp);
            rc = OMPI_ERROR;
            goto Error;
        }

        /* compute memory offset */
        mem_offset = (ptrdiff_t) temp->sm_mmap->data_addr -
            (ptrdiff_t) temp->sm_mmap->map_seg;
        temp->sm_mmap->map_seg->seg_offset = mem_offset;
        temp->sm_mmap->map_seg->seg_size = temp->sm_file.size - mem_offset;
        /* more stuff to follow */

        /* append this peer's info, including shared memory map addr, onto the
           peer_list */

        /* record file data */
        backing_files[i] = (bcol_basesmuma_smcm_proc_item_t *) temp;

        opal_list_append(peer_list, (opal_list_item_t*) temp);
    }

    rc = OMPI_SUCCESS;

 Error:

    /* error clean-up and return */
    if (NULL != all_files) {
        free(all_files);
    }

    return rc;
}
Ejemplo n.º 25
0
static inline
int ompi_osc_pt2pt_rget_accumulate_internal (const void *origin_addr, int origin_count,
                                            struct ompi_datatype_t *origin_datatype,
                                            void *result_addr, int result_count,
                                            struct ompi_datatype_t *result_datatype,
                                            int target_rank, MPI_Aint target_disp,
                                            int target_count, struct ompi_datatype_t *target_datatype,
                                            struct ompi_op_t *op, struct ompi_win_t *win,
                                            bool release_req, struct ompi_request_t **request)
{
    int ret;
    ompi_osc_pt2pt_module_t *module = GET_MODULE(win);
    ompi_proc_t *proc = ompi_comm_peer_lookup(module->comm, target_rank);
    bool is_long_datatype = false;
    bool is_long_msg = false;
    ompi_osc_pt2pt_frag_t *frag;
    ompi_osc_pt2pt_header_acc_t *header;
    ompi_osc_pt2pt_sync_t *pt2pt_sync;
    size_t ddt_len, payload_len, frag_len;
    char *ptr;
    const void *packed_ddt;
    int tag;
    ompi_osc_pt2pt_request_t *pt2pt_request;

    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
                         "rget_acc: 0x%lx, %d, %s, 0x%lx, %d, %s, 0x%x, %d, %d, %s, %s, %s",
                         (unsigned long) origin_addr, origin_count, origin_datatype->name,
                         (unsigned long) result_addr, result_count, result_datatype->name,
                         target_rank, (int) target_disp, target_count, target_datatype->name,
                         op->o_name, win->w_name));

    pt2pt_sync = ompi_osc_pt2pt_module_sync_lookup (module, target_rank, NULL);
    if (OPAL_UNLIKELY(NULL == pt2pt_sync)) {
        return OMPI_ERR_RMA_SYNC;
    }

    /* get_accumulates are always request based, so that we know where to land the data */
    OMPI_OSC_PT2PT_REQUEST_ALLOC(win, pt2pt_request);

    pt2pt_request->internal = release_req;

    /* short-circuit case. note that origin_count may be 0 if op is MPI_NO_OP */
    if (0 == result_count || 0 == target_count) {
        ompi_osc_pt2pt_request_complete (pt2pt_request, MPI_SUCCESS);
        *request = &pt2pt_request->super;
        return OMPI_SUCCESS;
    }

    if (!release_req) {
        /* wait for epoch to begin before starting operation */
        ompi_osc_pt2pt_sync_wait_expected (pt2pt_sync);
    }

    /* optimize the self case. TODO: optimize the local case */
    if (ompi_comm_rank (module->comm) == target_rank) {
        *request = &pt2pt_request->super;
        return ompi_osc_pt2pt_gacc_self (pt2pt_sync, origin_addr, origin_count, origin_datatype,
                                         result_addr, result_count, result_datatype,
                                         target_disp, target_count, target_datatype,
                                         op, module, pt2pt_request);
    }

    pt2pt_request->type = OMPI_OSC_PT2PT_HDR_TYPE_GET_ACC;
    pt2pt_request->origin_addr = origin_addr;
    pt2pt_request->origin_count = origin_count;
    OMPI_DATATYPE_RETAIN(origin_datatype);
    pt2pt_request->origin_dt = origin_datatype;

    /* Compute datatype and payload lengths.  Note that the datatype description
     * must fit in a single frag */
    ddt_len = ompi_datatype_pack_description_length(target_datatype);

    if (&ompi_mpi_op_no_op.op != op) {
        payload_len = origin_datatype->super.size * origin_count;
    } else {
        payload_len = 0;
    }

    frag_len = sizeof(*header) + ddt_len + payload_len;
    ret = ompi_osc_pt2pt_frag_alloc(module, target_rank, frag_len, &frag, &ptr, false, release_req);
    if (OMPI_SUCCESS != ret) {
        frag_len = sizeof(*header) + ddt_len;
        ret = ompi_osc_pt2pt_frag_alloc(module, target_rank, frag_len, &frag, &ptr, true, release_req);
        if (OMPI_SUCCESS != ret) {
            /* allocate space for the header plus space to store ddt_len */
            frag_len = sizeof(*header) + 8;
            ret = ompi_osc_pt2pt_frag_alloc(module, target_rank, frag_len, &frag, &ptr, true, release_req);
            if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
                return OMPI_ERR_OUT_OF_RESOURCE;
            }

            is_long_datatype = true;
        }

        is_long_msg = true;
    }

    tag = get_tag (module);

    /* If this is a long message then we need two completions before the
     * request is complete (1 for the send, 1 for the receive) */
    pt2pt_request->outstanding_requests = 1 + is_long_msg;

    /* increment the number of outgoing fragments */
    ompi_osc_signal_outgoing (module, target_rank, pt2pt_request->outstanding_requests);

    header = (ompi_osc_pt2pt_header_acc_t *) ptr;
    header->base.flags = 0;
    header->len = frag_len;
    header->count = target_count;
    header->displacement = target_disp;
    header->op = op->o_f_to_c_index;
    header->tag = tag;

    ptr = (char *)(header + 1);

    do {
        ret = ompi_datatype_get_pack_description(target_datatype, &packed_ddt);
        if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
            break;
        }

        if (is_long_datatype) {
            /* the datatype does not fit in an eager message. send it seperately */
            header->base.flags |= OMPI_OSC_PT2PT_HDR_FLAG_LARGE_DATATYPE;

            OMPI_DATATYPE_RETAIN(target_datatype);

            ret = ompi_osc_pt2pt_isend_w_cb ((void *) packed_ddt, ddt_len, MPI_BYTE,
                                            target_rank, tag_to_target(tag), module->comm,
                                            ompi_osc_pt2pt_dt_send_complete, target_datatype);
            if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
                break;
            }

            *((uint64_t *) ptr) = ddt_len;
            ptr += 8;
        } else {
            memcpy((unsigned char*) ptr, packed_ddt, ddt_len);
            ptr += ddt_len;
        }

        ret = ompi_osc_pt2pt_irecv_w_cb (result_addr, result_count, result_datatype,
                                        target_rank, tag_to_origin(tag), module->comm,
                                        NULL, ompi_osc_pt2pt_req_comm_complete, pt2pt_request);
        if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
            break;
        }

        if (!is_long_msg) {
            header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_GET_ACC;
            osc_pt2pt_hton(header, proc);

            if (&ompi_mpi_op_no_op.op != op) {
                osc_pt2pt_copy_for_send (ptr, payload_len, origin_addr, proc, origin_count,
                                        origin_datatype);
            }
        } else {
            header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_GET_ACC_LONG;
            osc_pt2pt_hton(header, proc);

            ret = ompi_osc_pt2pt_isend_w_cb (origin_addr, origin_count, origin_datatype,
                                            target_rank, tag_to_target(tag), module->comm,
                                            ompi_osc_pt2pt_req_comm_complete, pt2pt_request);
        }
    } while (0);

    if (OMPI_SUCCESS == ret) {
        header->base.flags |= OMPI_OSC_PT2PT_HDR_FLAG_VALID;
        *request = (ompi_request_t *) pt2pt_request;
    }

    return ompi_osc_pt2pt_frag_finish(module, frag);
}
Ejemplo n.º 26
0
static int
ompi_osc_pt2pt_accumulate_w_req (const void *origin_addr, int origin_count,
                                struct ompi_datatype_t *origin_dt,
                                int target, OPAL_PTRDIFF_TYPE target_disp,
                                int target_count,
                                struct ompi_datatype_t *target_dt,
                                struct ompi_op_t *op, ompi_win_t *win,
                                ompi_osc_pt2pt_request_t *request)
{
    int ret;
    ompi_osc_pt2pt_module_t *module = GET_MODULE(win);
    ompi_proc_t *proc = ompi_comm_peer_lookup(module->comm, target);
    bool is_long_datatype = false;
    bool is_long_msg = false;
    ompi_osc_pt2pt_frag_t *frag;
    ompi_osc_pt2pt_header_acc_t *header;
    ompi_osc_pt2pt_sync_t *pt2pt_sync;
    size_t ddt_len, payload_len, frag_len;
    char *ptr;
    const void *packed_ddt;
    int tag = -1;

    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
                         "acc: 0x%lx, %d, %s, %d, %d, %d, %s, %s, %s",
                         (unsigned long) origin_addr, origin_count,
                         origin_dt->name, target, (int) target_disp,
                         target_count, target_dt->name, op->o_name,
                         win->w_name));

    pt2pt_sync = ompi_osc_pt2pt_module_sync_lookup (module, target, NULL);
    if (OPAL_UNLIKELY(NULL == pt2pt_sync)) {
        return OMPI_ERR_RMA_SYNC;
    }

    /* short-circuit case */
    if (0 == origin_count || 0 == target_count) {
        if (request) {
            ompi_osc_pt2pt_request_complete (request, MPI_SUCCESS);
        }

        return OMPI_SUCCESS;
    }

    /* optimize the self case. TODO: optimize the local case */
    if (ompi_comm_rank (module->comm) == target) {
        return ompi_osc_pt2pt_acc_self (pt2pt_sync, origin_addr, origin_count, origin_dt,
                                        target_disp, target_count, target_dt,
                                        op, module, request);
    }

    /* Compute datatype and payload lengths.  Note that the datatype description
     * must fit in a single frag */
    ddt_len = ompi_datatype_pack_description_length(target_dt);
    payload_len = origin_dt->super.size * origin_count;

    frag_len = sizeof(*header) + ddt_len + payload_len;
    ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr);
    if (OMPI_SUCCESS != ret) {
        frag_len = sizeof(*header) + ddt_len;
        ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr);
        if (OMPI_SUCCESS != ret) {
            /* allocate space for the header plus space to store ddt_len */
            frag_len = sizeof(*header) + 8;
            ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr);
            if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
                return OMPI_ERR_OUT_OF_RESOURCE;
            }

            is_long_datatype = true;
         }

        is_long_msg = true;
        tag = get_rtag (module);
    }

    /* flush will be called at the end of this function. make sure all post messages have
     * arrived. */
    if ((is_long_msg || request) && OMPI_OSC_PT2PT_SYNC_TYPE_PSCW == pt2pt_sync->type) {
        ompi_osc_pt2pt_sync_wait (pt2pt_sync);
    }

    header = (ompi_osc_pt2pt_header_acc_t*) ptr;
    header->base.flags = 0;
    header->len = frag_len;
    header->count = target_count;
    header->displacement = target_disp;
    header->op = op->o_f_to_c_index;
    ptr += sizeof (*header);

    do {
        ret = ompi_datatype_get_pack_description(target_dt, &packed_ddt);
        if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
            break;
        }

        if (is_long_datatype) {
            /* the datatype does not fit in an eager message. send it seperately */
            header->base.flags |= OMPI_OSC_PT2PT_HDR_FLAG_LARGE_DATATYPE;

            OBJ_RETAIN(target_dt);

            ret = ompi_osc_pt2pt_isend_w_cb ((void *) packed_ddt, ddt_len, MPI_BYTE, target,
                                            tag, module->comm, ompi_osc_pt2pt_dt_send_complete,
                                            target_dt);
            if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
                break;
            }

            *((uint64_t *) ptr) = ddt_len;
            ptr += 8;
        } else {
            memcpy((unsigned char*) ptr, packed_ddt, ddt_len);
            ptr += ddt_len;
        }

        if (!is_long_msg) {
            header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_ACC;
            osc_pt2pt_hton(header, proc);

            osc_pt2pt_copy_for_send (ptr, payload_len, origin_addr, proc,
                                    origin_count, origin_dt);

            /* the user's buffer is no longer needed so mark the request as
             * complete. */
            if (request) {
                ompi_osc_pt2pt_request_complete (request, MPI_SUCCESS);
            }
        } else {
            header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_ACC_LONG;
            header->tag = tag;
            osc_pt2pt_hton(header, proc);

            OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
                                 "acc: starting long accumulate with tag %d", tag));

            /* increment the outgoing send count */
            ompi_osc_signal_outgoing (module, target, 1);

            if (request) {
                request->outstanding_requests = 1;
                ret = ompi_osc_pt2pt_isend_w_cb (origin_addr, origin_count, origin_dt,
                                                target, tag, module->comm, ompi_osc_pt2pt_req_comm_complete,
                                                request);
            } else {
                ret = ompi_osc_pt2pt_component_isend (module, origin_addr, origin_count, origin_dt, target, tag,
                                                     module->comm);
            }
        }
    } while (0);

    if (OMPI_SUCCESS != ret) {
        OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
                             "acc: failed with eror %d", ret));
    } else {
        /* mark the fragment as valid */
        header->base.flags |= OMPI_OSC_PT2PT_HDR_FLAG_VALID;
    }

    ret = ompi_osc_pt2pt_frag_finish(module, frag);

    if (is_long_msg || request) {
        /* need to flush now in case the caller decides to wait on the request */
        ompi_osc_pt2pt_frag_flush_target (module, target);
    }

    return ret;
}
Ejemplo n.º 27
0
static int
setup_scatter_buffers_linear(struct ompi_communicator_t   *comm,
                             ompi_coll_portals4_request_t *request,
                             mca_coll_portals4_module_t   *portals4_module)
{
    int ret, line;

    int8_t i_am_root = (request->u.scatter.my_rank == request->u.scatter.root_rank);

    ompi_coll_portals4_create_send_converter (&request->u.scatter.send_converter,
            request->u.scatter.pack_src_buf,
            ompi_comm_peer_lookup(comm, request->u.scatter.my_rank),
            request->u.scatter.pack_src_count,
            request->u.scatter.pack_src_dtype);
    opal_convertor_get_packed_size(&request->u.scatter.send_converter, &request->u.scatter.packed_size);
    OBJ_DESTRUCT(&request->u.scatter.send_converter);

    /**********************************/
    /* Setup Scatter Buffers           */
    /**********************************/
    if (i_am_root) {

        /*
         * calculate the total size of the packed data
         */
        request->u.scatter.scatter_bytes=request->u.scatter.packed_size * (ptrdiff_t)request->u.scatter.size;

        /* all transfers done using request->u.scatter.sdtype.
         * allocate temp buffer for recv, copy and/or rotate data at the end */
        request->u.scatter.scatter_buf = (char *) malloc(request->u.scatter.scatter_bytes);
        if (NULL == request->u.scatter.scatter_buf) {
            ret = OMPI_ERR_OUT_OF_RESOURCE;
            line = __LINE__;
            goto err_hdlr;
        }
        request->u.scatter.free_after = 1;

        for (int32_t i=0; i<request->u.scatter.size; i++) {
            uint32_t iov_count = 1;
            struct iovec iov;
            size_t max_data;

            uint64_t offset = request->u.scatter.pack_src_extent * request->u.scatter.pack_src_count * i;

            opal_output_verbose(30, ompi_coll_base_framework.framework_output,
                                "%s:%d:rank(%d): offset(%lu)",
                                __FILE__, __LINE__, request->u.scatter.my_rank,
                                offset);

            ompi_coll_portals4_create_send_converter (&request->u.scatter.send_converter,
                    request->u.scatter.pack_src_buf + offset,
                    ompi_comm_peer_lookup(comm, request->u.scatter.my_rank),
                    request->u.scatter.pack_src_count,
                    request->u.scatter.pack_src_dtype);

            iov.iov_len = request->u.scatter.packed_size;
            iov.iov_base = (IOVBASE_TYPE *) ((char *)request->u.scatter.scatter_buf + (request->u.scatter.packed_size*i));
            opal_convertor_pack(&request->u.scatter.send_converter, &iov, &iov_count, &max_data);

            OBJ_DESTRUCT(&request->u.scatter.send_converter);
        }

        opal_output_verbose(30, ompi_coll_base_framework.framework_output,
                            "%s:%d:rank(%d): root - scatter_buf(%p) - scatter_bytes(%lu)=packed_size(%ld) * size(%d)",
                            __FILE__, __LINE__, request->u.scatter.my_rank,
                            request->u.scatter.scatter_buf, request->u.scatter.scatter_bytes,
                            request->u.scatter.packed_size, request->u.scatter.size);
    } else {
        request->u.scatter.scatter_bytes=request->u.scatter.packed_size;
        request->u.scatter.scatter_buf = (char *) malloc(request->u.scatter.scatter_bytes);
        if (NULL == request->u.scatter.scatter_buf) {
            ret = OMPI_ERR_OUT_OF_RESOURCE;
            line = __LINE__;
            goto err_hdlr;
        }
        request->u.scatter.free_after = 1;

        opal_output_verbose(30, ompi_coll_base_framework.framework_output,
                            "%s:%d:rank(%d): leaf - scatter_buf(%p) - scatter_bytes(%lu)=packed_size(%ld)",
                            __FILE__, __LINE__, request->u.scatter.my_rank,
                            request->u.scatter.scatter_buf, request->u.scatter.scatter_bytes,
                            request->u.scatter.packed_size);
    }

    return OMPI_SUCCESS;

err_hdlr:
    opal_output(ompi_coll_base_framework.framework_output,
                "%s:%4d:%4d\tError occurred ret=%d, rank %2d",
                __FILE__, __LINE__, line, ret, request->u.scatter.my_rank);

    return ret;
}
Ejemplo n.º 28
0
int
ompi_osc_pt2pt_module_start(ompi_group_t *group,
                            int assert,
                            ompi_win_t *win)
{
    int i, ret = OMPI_SUCCESS;
    ompi_osc_pt2pt_module_t *module = P2P_MODULE(win);

    OBJ_RETAIN(group);
    ompi_group_increment_proc_count(group);

    OPAL_THREAD_LOCK(&(module->p2p_lock));
    if (NULL != module->p2p_sc_group) {
        OPAL_THREAD_UNLOCK(&module->p2p_lock);
        ret = MPI_ERR_RMA_SYNC;
        goto cleanup;
    }
    module->p2p_sc_group = group;    

    /* possible we've already received a couple in messages, so
       add however many we're going to wait for */
    module->p2p_num_post_msgs += ompi_group_size(module->p2p_sc_group);
    OPAL_THREAD_UNLOCK(&(module->p2p_lock));

    memset(module->p2p_sc_remote_active_ranks, 0,
           sizeof(bool) * ompi_comm_size(module->p2p_comm));

    /* for each process in the specified group, find it's rank in our
       communicator, store those indexes, and set the true / false in
       the active ranks table */
    for (i = 0 ; i < ompi_group_size(group) ; i++) {
        int comm_rank = -1, j;
        
        /* find the rank in the communicator associated with this windows */
        for (j = 0 ; j < ompi_comm_size(module->p2p_comm) ; ++j) {
            if (ompi_group_peer_lookup(module->p2p_sc_group, i) ==
                ompi_comm_peer_lookup(module->p2p_comm, j)) {
                comm_rank = j;
                break;
            }
        }
        if (comm_rank == -1) {
            ret = MPI_ERR_RMA_SYNC;
            goto cleanup;
        }

        module->p2p_sc_remote_active_ranks[comm_rank] = true;
        module->p2p_sc_remote_ranks[i] = comm_rank;
    }

    /* Set our mode to access w/ start */
    ompi_win_remove_mode(win, OMPI_WIN_FENCE);
    ompi_win_append_mode(win, OMPI_WIN_ACCESS_EPOCH | OMPI_WIN_STARTED);

    return OMPI_SUCCESS;

 cleanup:
    ompi_group_decrement_proc_count(group);
    OBJ_RELEASE(group);
    return ret;
}
Ejemplo n.º 29
0
static inline int ompi_osc_rdma_put_w_req (void *origin_addr, int origin_count,
                                           struct ompi_datatype_t *origin_dt,
                                           int target, OPAL_PTRDIFF_TYPE target_disp,
                                           int target_count, struct ompi_datatype_t *target_dt,
                                           ompi_win_t *win, ompi_osc_rdma_request_t *request)
{
    ompi_osc_rdma_module_t *module = GET_MODULE(win);
    ompi_proc_t *proc = ompi_comm_peer_lookup(module->comm, target);
    ompi_osc_rdma_frag_t *frag;
    ompi_osc_rdma_header_put_t *header;
    size_t ddt_len, payload_len, frag_len;
    bool is_long_datatype = false;
    bool is_long_msg = false;
    const void *packed_ddt;
    int tag = -1, ret;
    char *ptr;

    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
                         "put: 0x%lx, %d, %s, %d, %d, %d, %s, %s",
                         (unsigned long) origin_addr, origin_count,
                         origin_dt->name, target, (int) target_disp,
                         target_count, target_dt->name, win->w_name));

    if (!ompi_osc_rdma_check_access_epoch (module, target)) {
        return OMPI_ERR_RMA_SYNC;
    }

    /* short-circuit case */
    if (0 == origin_count || 0 == target_count) {
        if (request) {
            ompi_osc_rdma_request_complete (request, MPI_SUCCESS);
        }

        return OMPI_SUCCESS;
    }

    /* optimize self communication. TODO: optimize local communication */
    if (ompi_comm_rank (module->comm) == target) {
        return ompi_osc_rdma_put_self (origin_addr, origin_count, origin_dt,
                                       target_disp, target_count, target_dt,
                                       module, request);
    }

    /* Compute datatype and payload lengths.  Note that the datatype description
     * must fit in a single buffer */
    ddt_len = ompi_datatype_pack_description_length(target_dt);
    payload_len = origin_dt->super.size * origin_count;
    frag_len = sizeof(ompi_osc_rdma_header_put_t) + ddt_len + payload_len;

    OPAL_THREAD_LOCK(&module->lock);

    ret = ompi_osc_rdma_frag_alloc(module, target, frag_len, &frag, &ptr);
    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
        frag_len = sizeof(ompi_osc_rdma_header_put_t) + ddt_len;
        ret = ompi_osc_rdma_frag_alloc(module, target, frag_len, &frag, &ptr);
        if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
            /* allocate space for the header plus space to store ddt_len */
            frag_len = sizeof(ompi_osc_rdma_header_put_t) + 8;
            ret = ompi_osc_rdma_frag_alloc(module, target, frag_len, &frag, &ptr);
            if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
                OPAL_THREAD_UNLOCK(&module->lock);
                return OMPI_ERR_OUT_OF_RESOURCE;
            }

            is_long_datatype = true;
        }

        is_long_msg = true;
        tag = get_tag(module);
    }

    /* flush will be called at the end of this function. make sure the post message has
     * arrived. */
    if ((is_long_msg || request) && module->sc_group) {
        while (0 != module->num_post_msgs) {
            OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
                                 "waiting for post messages. num_post_msgs = %d", module->num_post_msgs));
            opal_condition_wait(&module->cond, &module->lock);
        }
    }

    OPAL_THREAD_UNLOCK(&module->lock);

    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
                         "osc rdma: put long protocol: %d, large datatype: %d",
                         (int) is_long_msg, (int) is_long_datatype));

    header = (ompi_osc_rdma_header_put_t *) ptr;
    header->base.flags = 0;
    header->len = frag_len;
    header->count = target_count;
    header->displacement = target_disp;
    ptr += sizeof(ompi_osc_rdma_header_put_t);

    do {
        ret = ompi_datatype_get_pack_description(target_dt, &packed_ddt);
        if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
            break;
        }

        if (is_long_datatype) {
            /* the datatype does not fit in an eager message. send it seperately */
            header->base.flags |= OMPI_OSC_RDMA_HDR_FLAG_LARGE_DATATYPE;

            OBJ_RETAIN(target_dt);

            ret = ompi_osc_rdma_isend_w_cb ((void *) packed_ddt, ddt_len, MPI_BYTE, target,
                                            tag, module->comm, ompi_osc_rdma_dt_send_complete,
                                            target_dt);
            if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
                break;
            }

            *((uint64_t *) ptr) = ddt_len;
            ptr += 8;
        } else {
            memcpy((unsigned char*) ptr, packed_ddt, ddt_len);
            ptr += ddt_len;
        }

        if (!is_long_msg) {
            header->base.type = OMPI_OSC_RDMA_HDR_TYPE_PUT;

            osc_rdma_copy_for_send (ptr, payload_len, origin_addr, proc, origin_count,
                                    origin_dt);

            /* the user's buffer is no longer needed so mark the request as
             * complete. */
            if (request) {
                ompi_osc_rdma_request_complete (request, MPI_SUCCESS);
            }
        } else {
            header->base.type = OMPI_OSC_RDMA_HDR_TYPE_PUT_LONG;

            header->tag = tag;

            /* increase the outgoing signal count */
            ompi_osc_signal_outgoing (module, target, 1);

            if (request) {
                request->outstanding_requests = 1;
                ret = ompi_osc_rdma_isend_w_cb (origin_addr, origin_count, origin_dt,
                                                target, tag, module->comm, ompi_osc_rdma_req_comm_complete,
                                                request);
            } else {
                ret = ompi_osc_rdma_component_isend (module,origin_addr, origin_count, origin_dt, target, tag,
                                                     module->comm);
            }
        }
    } while (0);

    if (OPAL_LIKELY(OMPI_SUCCESS == ret)) {
        header->base.flags |= OMPI_OSC_RDMA_HDR_FLAG_VALID;
    }

    OPAL_THREAD_LOCK(&module->lock);

    ret = ompi_osc_rdma_frag_finish(module, frag);

    if (request || is_long_msg) {
        /* need to flush now in case the caller decides to wait on the request */
        ompi_osc_rdma_frag_flush_target (module, target);
    }
    OPAL_THREAD_UNLOCK(&module->lock);

    return ret;
}
Ejemplo n.º 30
0
int mca_pml_ob1_send(void *buf,
                     size_t count,
                     ompi_datatype_t * datatype,
                     int dst,
                     int tag,
                     mca_pml_base_send_mode_t sendmode,
                     ompi_communicator_t * comm)
{
    mca_pml_ob1_comm_t* ob1_comm = comm->c_pml_comm;
    ompi_proc_t *dst_proc = ompi_comm_peer_lookup (comm, dst);
    mca_bml_base_endpoint_t* endpoint = (mca_bml_base_endpoint_t*)
                                        dst_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
    mca_pml_ob1_send_request_t *sendreq =
        alloca(mca_pml_base_send_requests.fl_frag_size);
    int16_t seqn;
    int rc;

    if (OPAL_UNLIKELY(MCA_PML_BASE_SEND_BUFFERED == sendmode)) {
        /* large buffered sends *need* a real request so use isend instead */
        ompi_request_t *brequest;

        rc = mca_pml_ob1_isend (buf, count, datatype, dst, tag, sendmode, comm, &brequest);
        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
            return rc;
        }

        /* free the request and return. don't care if it completes now */
        ompi_request_free (&brequest);
        return OMPI_SUCCESS;
    }

    if (OPAL_UNLIKELY(NULL == endpoint)) {
        return OMPI_ERR_UNREACH;
    }

    seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_comm->procs[dst].send_sequence, 1);

    if (MCA_PML_BASE_SEND_SYNCHRONOUS != sendmode) {
        rc = mca_pml_ob1_send_inline (buf, count, datatype, dst, tag, seqn, dst_proc,
                                      endpoint, comm);
        if (OPAL_LIKELY(0 <= rc)) {
            return OMPI_SUCCESS;
        }
    }

    OBJ_CONSTRUCT(sendreq, mca_pml_ob1_send_request_t);
    sendreq->req_send.req_base.req_proc = dst_proc;
    sendreq->src_des = NULL;

    MCA_PML_OB1_SEND_REQUEST_INIT(sendreq,
                                  buf,
                                  count,
                                  datatype,
                                  dst, tag,
                                  comm, sendmode, false);

    PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE,
                             &sendreq->req_send.req_base,
                             PERUSE_SEND);

    MCA_PML_OB1_SEND_REQUEST_START_W_SEQ(sendreq, endpoint, seqn, rc);
    if (rc != OMPI_SUCCESS) {
        return rc;
    }

    ompi_request_wait_completion(&sendreq->req_send.req_base.req_ompi);

    rc = sendreq->req_send.req_base.req_ompi.req_status.MPI_ERROR;
    MCA_PML_BASE_SEND_REQUEST_FINI(&sendreq->req_send);
    OBJ_DESTRUCT(sendreq);

    return rc;
}