예제 #1
0
int
ompi_osc_portals4_put(void *origin_addr,
                      int origin_count,
                      struct ompi_datatype_t *origin_dt,
                      int target,
                      OPAL_PTRDIFF_TYPE target_disp,
                      int target_count,
                      struct ompi_datatype_t *target_dt,
                      struct ompi_win_t *win)
{
    int ret;
    ompi_osc_portals4_module_t *module =
        (ompi_osc_portals4_module_t*) win->w_osc_module;
    ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
    size_t length;
    size_t offset;
    ptl_handle_md_t md_h;
    void *md_base;

    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
                         "put: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx",
                         (unsigned long) origin_addr, origin_count,
                         origin_dt->name, target, (int) target_disp,
                         target_count, target_dt->name,
                         (unsigned long) win));

    offset = get_displacement(module, target) * target_disp;

    if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count) ||
        !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
        opal_output(ompi_osc_base_framework.framework_output,
                    "MPI_Put: transfer of non-contiguous memory is not currently supported.\n");
        return OMPI_ERR_NOT_SUPPORTED;
    } else {
        (void)opal_atomic_add_64(&module->opcount, 1);
        ret = ompi_datatype_type_size(origin_dt, &length);
        if (OMPI_SUCCESS != ret) {
            return ret;
        }
        length *= origin_count;
        ompi_osc_portals4_get_md(origin_addr, module->md_h, &md_h, &md_base);
        ret = PtlPut(md_h,
                     (ptl_size_t) ((char*) origin_addr - (char*) md_base),
                     length,
                     PTL_ACK_REQ,
                     peer,
                     module->pt_idx,
                     module->match_bits,
                     offset,
                     NULL,
                     0);
        if (OMPI_SUCCESS != ret) {
            return ret;
        }
    }

    return OMPI_SUCCESS;
}
예제 #2
0
static inline int mca_btl_ugni_ep_connect_finish (mca_btl_base_endpoint_t *ep) {
    gni_return_t grc;
    int rc;

    BTL_VERBOSE(("finishing connection. remote attributes: msg_type = %d, msg_buffer = %p, buff_size = %d, "
                 "mem_hndl = {qword1 = %" PRIu64 ", qword2 = %" PRIu64 "}, mbox = %d, mbox_maxcredit = %d, "
                 "msg_maxsize = %d", ep->remote_attr.smsg_attr.msg_type, ep->remote_attr.smsg_attr.msg_buffer,
                 ep->remote_attr.smsg_attr.buff_size, ep->remote_attr.smsg_attr.mem_hndl.qword1,
                 ep->remote_attr.smsg_attr.mem_hndl.qword2, ep->remote_attr.smsg_attr.mbox_offset,
                 ep->remote_attr.smsg_attr.mbox_maxcredit, ep->remote_attr.smsg_attr.msg_maxsize));

    BTL_VERBOSE(("finishing connection. local attributes: msg_type = %d, msg_buffer = %p, buff_size = %d, "
                 "mem_hndl = {qword1 = %" PRIu64 ", qword2 = %" PRIu64 "}, mbox = %d, mbox_maxcredit = %d, "
                 "msg_maxsize = %d", ep->mailbox->attr.smsg_attr.msg_type, ep->mailbox->attr.smsg_attr.msg_buffer,
                 ep->mailbox->attr.smsg_attr.buff_size, ep->mailbox->attr.smsg_attr.mem_hndl.qword1,
                 ep->mailbox->attr.smsg_attr.mem_hndl.qword2, ep->mailbox->attr.smsg_attr.mbox_offset,
                 ep->mailbox->attr.smsg_attr.mbox_maxcredit, ep->mailbox->attr.smsg_attr.msg_maxsize));

    grc = GNI_SmsgInit (ep->smsg_ep_handle, &ep->mailbox->attr.smsg_attr, &ep->remote_attr.smsg_attr);
    if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) {
        BTL_ERROR(("error initializing SMSG protocol. rc = %d", grc));

        return opal_common_rc_ugni_to_opal (grc);
    }

    /* set the local event data to the local index and the remote event data to my
     * index on the remote peer. This makes lookup of endpoints on completion take
     * a single lookup in the endpoints array. we will not be able to change the
     * remote peer's index in the endpoint's array after this point. */
    GNI_EpSetEventData (ep->rdma_ep_handle, ep->index, ep->remote_attr.index);
    GNI_EpSetEventData (ep->smsg_ep_handle, ep->index, ep->remote_attr.index);

    ep->rmt_irq_mem_hndl = ep->remote_attr.rmt_irq_mem_hndl;
    ep->state = MCA_BTL_UGNI_EP_STATE_CONNECTED;
    (void) opal_atomic_add_64 (&ep->btl->connected_peer_count, 1);

    /* send all pending messages */
    BTL_VERBOSE(("endpoint connected. posting %u sends", (unsigned int) opal_list_get_size (&ep->frag_wait_list)));

    rc = mca_btl_ugni_progress_send_wait_list (ep);
    if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
        OPAL_THREAD_LOCK(&ep->btl->ep_wait_list_lock);
        if (false == ep->wait_listed) {
            opal_list_append (&ep->btl->ep_wait_list, &ep->super);
            ep->wait_listed = true;
        }
        OPAL_THREAD_UNLOCK(&ep->btl->ep_wait_list_lock);
    }

    return OPAL_SUCCESS;
}
예제 #3
0
static void *thread_main(void *arg)
{
    int rank = (int) (unsigned long) arg;
    int i;

    /* thread tests */

    for (i = 0; i < nreps; i++) {
        opal_atomic_add_32(&val32, 5);
#if OPAL_HAVE_ATOMIC_MATH_64
        opal_atomic_add_64(&val64, 5);
#endif
        opal_atomic_add(&valint, 5);
    }

    return (void *) (unsigned long) (rank + 1000);
}
예제 #4
0
int mca_btl_ugni_ep_disconnect (mca_btl_base_endpoint_t *ep, bool send_disconnect) {
    gni_return_t rc;

    if (MCA_BTL_UGNI_EP_STATE_INIT == ep->state) {
        /* nothing to do */
        return OPAL_SUCCESS;
    }

    if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state && send_disconnect) {
        OPAL_THREAD_LOCK(&ep->common->dev->dev_lock);
        rc = GNI_SmsgSendWTag (ep->smsg_ep_handle, NULL, 0, NULL, 0, -1,
                               MCA_BTL_UGNI_TAG_DISCONNECT);
        OPAL_THREAD_UNLOCK(&ep->common->dev->dev_lock);
        if (GNI_RC_SUCCESS != rc) {
            BTL_VERBOSE(("btl/ugni could not send close message"));
        }

        /* we might want to wait for local completion here (do we even care), yes we do */
        /* TODO: FIX FIX FIX */

    }

    /* TODO: FIX GROSS */
    OPAL_THREAD_LOCK(&ep->common->dev->dev_lock);
    (void) opal_common_ugni_ep_destroy (&ep->smsg_ep_handle);
    (void) opal_common_ugni_ep_destroy (&ep->rdma_ep_handle);
    OPAL_THREAD_UNLOCK(&ep->common->dev->dev_lock);

    if (ep->mailbox) {
        opal_free_list_return (&ep->btl->smsg_mboxes, ((opal_free_list_item_t *) ep->mailbox));
        ep->mailbox = NULL;
    }

    ep->state = MCA_BTL_UGNI_EP_STATE_INIT;
    (void) opal_atomic_add_64 (&ep->btl->connected_peer_count, -11);

    return OPAL_SUCCESS;
}
예제 #5
0
int main(int argc, char *argv[])
{
#if OPAL_HAVE_POSIX_THREADS
    int tid;
    pthread_t *th;
#endif
    
    if (argc != 2) {
        printf("*** Incorrect number of arguments.  Skipping test\n");
        return 77;
    }
    nthreads = atoi(argv[1]);


    /* first test single-threaded functionality */

    /* -- cmpset 32-bit tests -- */

    vol32 = 42, old32 = 42, new32 = 50;
    assert(opal_atomic_cmpset_32(&vol32, old32, new32) == 1);
    opal_atomic_rmb();
    assert(vol32 == new32);

    vol32 = 42, old32 = 420, new32 = 50;
    assert(opal_atomic_cmpset_32(&vol32, old32, new32) ==  0);
    opal_atomic_rmb();
    assert(vol32 == 42);

    vol32 = 42, old32 = 42, new32 = 50;
    assert(opal_atomic_cmpset_acq_32(&vol32, old32, new32) == 1);
    assert(vol32 == new32);

    vol32 = 42, old32 = 420, new32 = 50;
    assert(opal_atomic_cmpset_acq_32(&vol32, old32, new32) == 0);
    assert(vol32 == 42);

    vol32 = 42, old32 = 42, new32 = 50;
    assert(opal_atomic_cmpset_rel_32(&vol32, old32, new32) ==  1);
    opal_atomic_rmb();
    assert(vol32 == new32);

    vol32 = 42, old32 = 420, new32 = 50;
    assert(opal_atomic_cmpset_rel_32(&vol32, old32, new32) == 0);
    opal_atomic_rmb();
    assert(vol32 == 42);

    /* -- cmpset 64-bit tests -- */

#if OPAL_HAVE_ATOMIC_MATH_64
    vol64 = 42, old64 = 42, new64 = 50;
    assert(1 == opal_atomic_cmpset_64(&vol64, old64, new64));
    opal_atomic_rmb();
    assert(new64 == vol64);

    vol64 = 42, old64 = 420, new64 = 50;
    assert(opal_atomic_cmpset_64(&vol64, old64, new64) == 0);
    opal_atomic_rmb();
    assert(vol64 == 42);

    vol64 = 42, old64 = 42, new64 = 50;
    assert(opal_atomic_cmpset_acq_64(&vol64, old64, new64) == 1);
    assert(vol64 == new64);

    vol64 = 42, old64 = 420, new64 = 50;
    assert(opal_atomic_cmpset_acq_64(&vol64, old64, new64) == 0);
    assert(vol64 == 42);

    vol64 = 42, old64 = 42, new64 = 50;
    assert(opal_atomic_cmpset_rel_64(&vol64, old64, new64) == 1);
    opal_atomic_rmb();
    assert(vol64 == new64);

    vol64 = 42, old64 = 420, new64 = 50;
    assert(opal_atomic_cmpset_rel_64(&vol64, old64, new64) == 0);
    opal_atomic_rmb();
    assert(vol64 == 42);
#endif
    /* -- cmpset int tests -- */

    volint = 42, oldint = 42, newint = 50;
    assert(opal_atomic_cmpset(&volint, oldint, newint) == 1);
    opal_atomic_rmb();
    assert(volint ==newint);

    volint = 42, oldint = 420, newint = 50;
    assert(opal_atomic_cmpset(&volint, oldint, newint) == 0);
    opal_atomic_rmb();
    assert(volint == 42);

    volint = 42, oldint = 42, newint = 50;
    assert(opal_atomic_cmpset_acq(&volint, oldint, newint) == 1);
    assert(volint == newint);

    volint = 42, oldint = 420, newint = 50;
    assert(opal_atomic_cmpset_acq(&volint, oldint, newint) == 0);
    assert(volint == 42);

    volint = 42, oldint = 42, newint = 50;
    assert(opal_atomic_cmpset_rel(&volint, oldint, newint) == 1);
    opal_atomic_rmb();
    assert(volint == newint);

    volint = 42, oldint = 420, newint = 50;
    assert(opal_atomic_cmpset_rel(&volint, oldint, newint) == 0);
    opal_atomic_rmb();
    assert(volint == 42);


    /* -- cmpset ptr tests -- */

    volptr = (void *) 42, oldptr = (void *) 42, newptr = (void *) 50;
    assert(opal_atomic_cmpset_ptr(&volptr, oldptr, newptr) == 1);
    opal_atomic_rmb();
    assert(volptr == newptr);

    volptr = (void *) 42, oldptr = (void *) 420, newptr = (void *) 50;
    assert(opal_atomic_cmpset_ptr(&volptr, oldptr, newptr) == 0);
    opal_atomic_rmb();
    assert(volptr == (void *) 42);

    volptr = (void *) 42, oldptr = (void *) 42, newptr = (void *) 50;
    assert(opal_atomic_cmpset_acq_ptr(&volptr, oldptr, newptr) == 1);
    assert(volptr == newptr);

    volptr = (void *) 42, oldptr = (void *) 420, newptr = (void *) 50;
    assert(opal_atomic_cmpset_acq_ptr(&volptr, oldptr, newptr) == 0);
    assert(volptr == (void *) 42);

    volptr = (void *) 42, oldptr = (void *) 42, newptr = (void *) 50;
    assert(opal_atomic_cmpset_rel_ptr(&volptr, oldptr, newptr) == 1);
    opal_atomic_rmb();
    assert(volptr == newptr);

    volptr = (void *) 42, oldptr = (void *) 420, newptr = (void *) 50;
    assert(opal_atomic_cmpset_rel_ptr(&volptr, oldptr, newptr) == 0);
    opal_atomic_rmb();
    assert(volptr == (void *) 42);

    /* -- add_32 tests -- */

    val32 = 42;
    assert(opal_atomic_add_32(&val32, 5) == (42 + 5));
    opal_atomic_rmb();
    assert((42 + 5) == val32);

    /* -- add_64 tests -- */
#if OPAL_HAVE_ATOMIC_MATH_64
    val64 = 42;
    assert(opal_atomic_add_64(&val64, 5) == (42 + 5));
    opal_atomic_rmb();
    assert((42 + 5) == val64);
#endif
    /* -- add_int tests -- */

    valint = 42;
    opal_atomic_add(&valint, 5);
    opal_atomic_rmb();
    assert((42 + 5) == valint);


    /* threaded tests */

    val32 = 0;
#if OPAL_HAVE_ATOMIC_MATH_64
    val64 = 0ul;
#endif
    valint = 0;

    /* -- create the thread set -- */
#if OPAL_HAVE_POSIX_THREADS
    th = (pthread_t *) malloc(nthreads * sizeof(pthread_t));
    if (!th) {
        perror("malloc");
        exit(EXIT_FAILURE);
    }
    for (tid = 0; tid < nthreads; tid++) {
        if (pthread_create(&th[tid], NULL, thread_main, (void *) (unsigned long) tid) != 0) {
            perror("pthread_create");
            exit(EXIT_FAILURE);
        }
    }

    /* -- wait for the thread set to finish -- */

    for (tid = 0; tid < nthreads; tid++) {
        void *thread_return;

        if (pthread_join(th[tid], &thread_return) != 0) {
            perror("pthread_join");
            exit(EXIT_FAILURE);
        }
    }
    free(th);

    opal_atomic_rmb();
    assert((5 * nthreads * nreps) == val32);
#if OPAL_HAVE_ATOMIC_MATH_64
    opal_atomic_rmb();
    assert((5 * nthreads * nreps) ==  val64);
#endif
    opal_atomic_rmb();
    assert((5 * nthreads * nreps) == valint);
#endif

    return 0;
}
예제 #6
0
int
ompi_osc_portals4_compare_and_swap(const void *origin_addr,
                                   const void *compare_addr,
                                   void *result_addr,
                                   struct ompi_datatype_t *dt,
                                   int target,
                                   OPAL_PTRDIFF_TYPE target_disp,
                                   struct ompi_win_t *win)
{
    int ret;
    ompi_osc_portals4_module_t *module =
        (ompi_osc_portals4_module_t*) win->w_osc_module;
    ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
    size_t length;
    size_t offset;
    ptl_datatype_t ptl_dt;
    ptl_size_t result_md_offset, origin_md_offset;

    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
                         "compare_and_swap: 0x%lx, 0x%lx, 0x%lx, %s, %d, %d, 0x%lx",
                         (unsigned long) origin_addr,
                         (unsigned long) compare_addr,
                         (unsigned long) result_addr,
                         dt->name, target, (int) target_disp,
                         (unsigned long) win));

    ret = ompi_osc_portals4_get_dt(dt, &ptl_dt);
    if (OMPI_SUCCESS != ret) return ret;

    offset = get_displacement(module, target) * target_disp;

    ret = ompi_datatype_type_size(dt, &length);
    if (OMPI_SUCCESS != ret) return ret;

    assert(length <= module->fetch_atomic_max);

    result_md_offset = (ptl_size_t) result_addr;
    origin_md_offset = (ptl_size_t) origin_addr;

    (void)opal_atomic_add_64(&module->opcount, 1);

    ret = PtlSwap(module->md_h,
                  result_md_offset,
                  module->md_h,
                  origin_md_offset,
                  length,
                  peer,
                  module->pt_idx,
                  module->match_bits,
                  offset,
                  NULL,
                  0,
                  compare_addr,
                  PTL_CSWAP,
                  ptl_dt);
    if (OMPI_SUCCESS != ret) {
        return ret;
    }

    return OMPI_SUCCESS;
}
예제 #7
0
int
ompi_osc_portals4_get_accumulate(const void *origin_addr,
                                 int origin_count,
                                 struct ompi_datatype_t *origin_dt,
                                 void *result_addr,
                                 int result_count,
                                 struct ompi_datatype_t *result_dt,
                                 int target,
                                 MPI_Aint target_disp,
                                 int target_count,
                                 struct ompi_datatype_t *target_dt,
                                 struct ompi_op_t *op,
                                 struct ompi_win_t *win)
{
    int ret;
    ompi_osc_portals4_module_t *module =
        (ompi_osc_portals4_module_t*) win->w_osc_module;
    ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
    size_t length, sent;
    size_t offset;
    ptl_op_t ptl_op;
    ptl_datatype_t ptl_dt;

    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
                         "get_accumulate: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, %d, %d, %s, %s, 0x%lx",
                         (unsigned long) origin_addr, origin_count,
                         origin_dt->name, (unsigned long) result_addr,
                         result_count, result_dt->name,
                         target, (int) target_disp,
                         target_count, target_dt->name,
                         op->o_name,
                         (unsigned long) win));

    offset = get_displacement(module, target) * target_disp;

    /* we don't support non-contiguous buffers.  but if the count is 0, we don't care if buffer is non-contiguous. */
    if ((origin_count > 0 && !ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) ||
        (result_count > 0 && !ompi_datatype_is_contiguous_memory_layout(result_dt, result_count)) ||
        (target_count > 0 && !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count))) {
        opal_output(ompi_osc_base_framework.framework_output,
                    "MPI_Get_accumulate: transfer of non-contiguous memory is not currently supported.\n");
        return OMPI_ERR_NOT_SUPPORTED;
    } else {
        sent = 0;
        if (MPI_REPLACE == op) {
            ptl_size_t result_md_offset, origin_md_offset;

            ret = ompi_datatype_type_size(origin_dt, &length);
            if (OMPI_SUCCESS != ret) {
                return ret;
            }
            ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
            if (OMPI_SUCCESS != ret) {
                return ret;
            }
            length *= origin_count;

            result_md_offset = (ptl_size_t) result_addr;
            origin_md_offset = (ptl_size_t) origin_addr;

            do {
                size_t msg_length = MIN(module->fetch_atomic_max, length - sent);

                (void)opal_atomic_add_64(&module->opcount, 1);

                ret = PtlSwap(module->md_h,
                              result_md_offset + sent,
                              module->md_h,
                              origin_md_offset + sent,
                              msg_length,
                              peer,
                              module->pt_idx,
                              module->match_bits,
                              offset + sent,
                              NULL,
                              0,
                              NULL,
                              PTL_SWAP,
                              ptl_dt);
                sent += msg_length;
            } while (sent < length);
        } else if (MPI_NO_OP == op) {
            ptl_size_t md_offset;

            ret = ompi_datatype_type_size(target_dt, &length);
            if (OMPI_SUCCESS != ret) {
                return ret;
            }
            length *= target_count;

            md_offset = (ptl_size_t) result_addr;

            do {
                size_t msg_length = MIN(module->fetch_atomic_max, length - sent);

                (void)opal_atomic_add_64(&module->opcount, 1);

                ret = PtlGet(module->md_h,
                             md_offset + sent,
                             msg_length,
                             peer,
                             module->pt_idx,
                             module->match_bits,
                             offset + sent,
                             NULL);
                sent += msg_length;
            } while (sent < length);
        } else {
            ptl_size_t result_md_offset, origin_md_offset;

            ret = ompi_datatype_type_size(origin_dt, &length);
            if (OMPI_SUCCESS != ret) {
                return ret;
            }
            length *= origin_count;

            result_md_offset = (ptl_size_t) result_addr;
            origin_md_offset = (ptl_size_t) origin_addr;

            ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
            if (OMPI_SUCCESS != ret) return ret;

            ret = ompi_osc_portals4_get_op(op, &ptl_op);
            if (OMPI_SUCCESS != ret) return ret;


            do {
                size_t msg_length = MIN(module->fetch_atomic_max, length - sent);

                (void)opal_atomic_add_64(&module->opcount, 1);

                ret = PtlFetchAtomic(module->md_h,
                                     result_md_offset + sent,
                                     module->md_h,
                                     origin_md_offset + sent,
                                     msg_length,
                                     peer,
                                     module->pt_idx,
                                     module->match_bits,
                                     offset + sent,
                                     NULL,
                                     0,
                                     ptl_op,
                                     ptl_dt);
                sent += msg_length;
            } while (sent < length);
        }
        if (OMPI_SUCCESS != ret) {
            return ret;
        }
    }

    return OMPI_SUCCESS;
}
예제 #8
0
int
ompi_osc_portals4_accumulate(const void *origin_addr,
                             int origin_count,
                             struct ompi_datatype_t *origin_dt,
                             int target,
                             OPAL_PTRDIFF_TYPE target_disp,
                             int target_count,
                             struct ompi_datatype_t *target_dt,
                             struct ompi_op_t *op,
                             struct ompi_win_t *win)
{
    int ret;
    ompi_osc_portals4_module_t *module =
        (ompi_osc_portals4_module_t*) win->w_osc_module;
    ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
    size_t length, sent;
    size_t offset;
    ptl_op_t ptl_op;
    ptl_datatype_t ptl_dt;

    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
                         "accumulate: 0x%lx, %d, %s, %d, %d, %d, %s, %s, 0x%lx",
                         (unsigned long) origin_addr, origin_count,
                         origin_dt->name, target, (int) target_disp,
                         target_count, target_dt->name,
                         op->o_name,
                         (unsigned long) win));

    offset = get_displacement(module, target) * target_disp;

    if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count) ||
        !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
        opal_output(ompi_osc_base_framework.framework_output,
                    "MPI_Accumulate: transfer of non-contiguous memory is not currently supported.\n");
        return OMPI_ERR_NOT_SUPPORTED;
    } else {
        ptl_size_t md_offset;

        ret = ompi_datatype_type_size(origin_dt, &length);
        if (OMPI_SUCCESS != ret) {
            return ret;
        }
        length *= origin_count;
        sent = 0;

        md_offset = (ptl_size_t) origin_addr;

        do {
            size_t msg_length = MIN(module->atomic_max, length - sent);
            (void)opal_atomic_add_64(&module->opcount, 1);

            if (MPI_REPLACE == op) {
                ret = PtlPut(module->md_h,
                             md_offset + sent,
                             msg_length,
                             PTL_ACK_REQ,
                             peer,
                             module->pt_idx,
                             module->match_bits,
                             offset + sent,
                             NULL,
                             0);
            } else {
                ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
                if (OMPI_SUCCESS != ret) return ret;

                ret = ompi_osc_portals4_get_op(op, &ptl_op);
                if (OMPI_SUCCESS != ret) return ret;

                ret = PtlAtomic(module->md_h,
                                md_offset + sent,
                                msg_length,
                                PTL_ACK_REQ,
                                peer,
                                module->pt_idx,
                                module->match_bits,
                                offset + sent,
                                NULL,
                                0,
                                ptl_op,
                                ptl_dt);
            }
            if (OMPI_SUCCESS != ret) {
                return ret;
            }
            sent += msg_length;
        } while (sent < length);
    }

    return OMPI_SUCCESS;
}
예제 #9
0
int
ompi_osc_portals4_rget_accumulate(const void *origin_addr,
                                  int origin_count,
                                  struct ompi_datatype_t *origin_dt,
                                  void *result_addr,
                                  int result_count,
                                  struct ompi_datatype_t *result_dt,
                                  int target,
                                  MPI_Aint target_disp,
                                  int target_count,
                                  struct ompi_datatype_t *target_dt,
                                  struct ompi_op_t *op,
                                  struct ompi_win_t *win,
                                  struct ompi_request_t **ompi_req)
{
    int ret;
    ompi_osc_portals4_request_t *request;
    ompi_osc_portals4_module_t *module =
        (ompi_osc_portals4_module_t*) win->w_osc_module;
    ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
    size_t length, sent;
    size_t offset;
    ptl_op_t ptl_op;
    ptl_datatype_t ptl_dt;

    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
                         "rget_accumulate: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, %d, %d, %s, %s, 0x%lx",
                         (unsigned long) origin_addr, origin_count,
                         origin_dt->name, (unsigned long) result_addr,
                         result_count, result_dt->name,
                         target, (int) target_disp,
                         target_count, target_dt->name,
                         op->o_name,
                         (unsigned long) win));

    OMPI_OSC_PORTALS4_REQUEST_ALLOC(win, request);
    if (NULL == request) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
    *ompi_req = &request->super;

    offset = get_displacement(module, target) * target_disp;

    if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count) ||
        !ompi_datatype_is_contiguous_memory_layout(result_dt, result_count) ||
        !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
        OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
        opal_output(ompi_osc_base_framework.framework_output,
                    "MPI_Rget_accumulate: transfer of non-contiguous memory is not currently supported.\n");
        return OMPI_ERR_NOT_SUPPORTED;
    } else {
        sent = 0;

        if (MPI_REPLACE == op) {
            ptl_size_t result_md_offset, origin_md_offset;

            ret = ompi_datatype_type_size(origin_dt, &length);
            if (OMPI_SUCCESS != ret) {
                OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
                return ret;
            }
            ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
            if (OMPI_SUCCESS != ret) {
                OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
                return ret;
            }
            length *= origin_count;

            result_md_offset = (ptl_size_t) result_addr;
            origin_md_offset = (ptl_size_t) origin_addr;

            do {
                size_t msg_length = MIN(module->fetch_atomic_max, length - sent);

                (void)opal_atomic_add_64(&module->opcount, 1);
                request->ops_expected++;

                ret = PtlSwap(module->req_md_h,
                              result_md_offset + sent,
                              module->md_h,
                              origin_md_offset + sent,
                              msg_length,
                              peer,
                              module->pt_idx,
                              module->match_bits,
                              offset + sent,
                              request,
                              0,
                              NULL,
                              PTL_SWAP,
                              ptl_dt);
                sent += msg_length;
            } while (sent < length);
        } else if (MPI_NO_OP == op) {
            ptl_size_t md_offset;

            ret = ompi_datatype_type_size(target_dt, &length);
            if (OMPI_SUCCESS != ret) {
                OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
                return ret;
            }
            length *= target_count;

            md_offset = (ptl_size_t) result_addr;

            do {
                size_t msg_length = MIN(module->fetch_atomic_max, length - sent);

                (void)opal_atomic_add_64(&module->opcount, 1);
                request->ops_expected++;

                ret = PtlGet(module->req_md_h,
                             md_offset + sent,
                             msg_length,
                             peer,
                             module->pt_idx,
                             module->match_bits,
                             offset + sent,
                             request);
                sent += msg_length;
            } while (sent < length);
        } else {
            ptl_size_t result_md_offset, origin_md_offset;

            ret = ompi_datatype_type_size(origin_dt, &length);
            if (OMPI_SUCCESS != ret) {
                OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
                return ret;
            }
            length *= origin_count;

            result_md_offset = (ptl_size_t) result_addr;
            origin_md_offset = (ptl_size_t) origin_addr;

            ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
            if (OMPI_SUCCESS != ret) return ret;

            ret = ompi_osc_portals4_get_op(op, &ptl_op);
            if (OMPI_SUCCESS != ret) return ret;

            do {
                size_t msg_length = MIN(module->fetch_atomic_max, length - sent);

                (void)opal_atomic_add_64(&module->opcount, 1);
                request->ops_expected++;

                ret = PtlFetchAtomic(module->req_md_h,
                                     result_md_offset + sent,
                                     module->md_h,
                                     origin_md_offset + sent,
                                     msg_length,
                                     peer,
                                     module->pt_idx,
                                     module->match_bits,
                                     offset + sent,
                                     request,
                                     0,
                                     ptl_op,
                                     ptl_dt);
                sent += msg_length;
            } while (sent < length);
        }
        if (OMPI_SUCCESS != ret) {
            OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
            return ret;
        }
    }

    return OMPI_SUCCESS;
}
예제 #10
0
int
ompi_osc_portals4_rget(void *origin_addr,
                       int origin_count,
                       struct ompi_datatype_t *origin_dt,
                       int target,
                       OPAL_PTRDIFF_TYPE target_disp,
                       int target_count,
                       struct ompi_datatype_t *target_dt,
                       struct ompi_win_t *win,
                       struct ompi_request_t **ompi_req)
{
    int ret;
    ompi_osc_portals4_request_t *request;
    ompi_osc_portals4_module_t *module =
        (ompi_osc_portals4_module_t*) win->w_osc_module;
    ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
    size_t length;
    size_t offset;

    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
                         "rget: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx",
                         (unsigned long) origin_addr, origin_count,
                         origin_dt->name, target, (int) target_disp,
                         target_count, target_dt->name,
                         (unsigned long) win));

    OMPI_OSC_PORTALS4_REQUEST_ALLOC(win, request);
    if (NULL == request) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
    *ompi_req = &request->super;

    offset = get_displacement(module, target) * target_disp;

    if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count) ||
        !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
        OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
        opal_output(ompi_osc_base_framework.framework_output,
                    "MPI_Rget: transfer of non-contiguous memory is not currently supported.\n");
        return OMPI_ERR_NOT_SUPPORTED;
    } else {
        (void)opal_atomic_add_64(&module->opcount, 1);
        request->ops_expected = 1;
        ret = ompi_datatype_type_size(origin_dt, &length);
        if (OMPI_SUCCESS != ret) {
            OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
            return ret;
        }
        length *= origin_count;
        ret = PtlGet(module->req_md_h,
                     (ptl_size_t) origin_addr,
                     length,
                     peer,
                     module->pt_idx,
                     module->match_bits,
                     offset,
                     request);
        if (OMPI_SUCCESS != ret) {
            OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
            return ret;
        }
    }

    return OMPI_SUCCESS;
}
예제 #11
0
int
ompi_osc_portals4_fetch_and_op(void *origin_addr,
                               void *result_addr,
                               struct ompi_datatype_t *dt,
                               int target,
                               OPAL_PTRDIFF_TYPE target_disp,
                               struct ompi_op_t *op,
                               struct ompi_win_t *win)
{
    int ret;
    ompi_osc_portals4_module_t *module =
        (ompi_osc_portals4_module_t*) win->w_osc_module;
    ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
    size_t length;
    size_t offset;
    ptl_op_t ptl_op;
    ptl_datatype_t ptl_dt;

    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
                         "fetch_and_op: 0x%lx, 0x%lx, %s, %d, %d, %s, 0x%lx",
                         (unsigned long) origin_addr, 
                         (unsigned long) result_addr,
                         dt->name, target, (int) target_disp,
                         op->o_name,
                         (unsigned long) win));

    ret = ompi_osc_portals4_get_dt(dt, &ptl_dt);
    if (OMPI_SUCCESS != ret) return ret;

    offset = get_displacement(module, target) * target_disp;

    ret = ompi_datatype_type_size(dt, &length);
    if (OMPI_SUCCESS != ret) return ret;

    assert(length < module->fetch_atomic_max);

    (void)opal_atomic_add_64(&module->opcount, 1);

    if (MPI_REPLACE == op) {
        ptl_handle_md_t result_md_h, origin_md_h;
        void *result_md_base, *origin_md_base;
        ptl_size_t result_md_offset, origin_md_offset;

        ompi_osc_portals4_get_md(result_addr, module->md_h, &result_md_h, &result_md_base);
        result_md_offset = ((char*) result_addr - (char*) result_md_base);
        ompi_osc_portals4_get_md(origin_addr, module->md_h, &origin_md_h, &origin_md_base);
        origin_md_offset = ((char*) origin_addr - (char*) origin_md_base);

        ret = PtlSwap(result_md_h,
                      result_md_offset,
                      origin_md_h,
                      origin_md_offset,
                      length,
                      peer,
                      module->pt_idx,
                      module->match_bits,
                      offset,
                      NULL,
                      0,
                      NULL,
                      PTL_SWAP,
                      ptl_dt);
    } else if (MPI_NO_OP == op) {
        ptl_handle_md_t md_h;
        void *md_base;
        ptl_size_t md_offset;

        ompi_osc_portals4_get_md(result_addr, module->md_h, &md_h, &md_base);
        md_offset = ((char*) result_addr - (char*) md_base);

        ret = PtlGet(md_h,
                     md_offset,
                     length,
                     peer,
                     module->pt_idx,
                     module->match_bits,
                     offset,
                     NULL);
    } else {
        ptl_handle_md_t result_md_h, origin_md_h;
        void *result_md_base, *origin_md_base;
        ptl_size_t result_md_offset, origin_md_offset; 

        ret = ompi_osc_portals4_get_op(op, &ptl_op);
        if (OMPI_SUCCESS != ret) return ret;

        ompi_osc_portals4_get_md(result_addr, module->md_h, &result_md_h, &result_md_base);
        result_md_offset = ((char*) result_addr - (char*) result_md_base);
        ompi_osc_portals4_get_md(origin_addr, module->md_h, &origin_md_h, &origin_md_base);
        origin_md_offset = ((char*) origin_addr - (char*) origin_md_base);

        ret = PtlFetchAtomic(result_md_h,
                             result_md_offset,
                             origin_md_h,
                             origin_md_offset,
                             length,
                             peer,
                             module->pt_idx,
                             module->match_bits,
                             offset,
                             NULL,
                             0,
                             ptl_op,
                             ptl_dt);
    }
    if (OMPI_SUCCESS != ret) {
        return ret;
    }

    return OMPI_SUCCESS;
}
예제 #12
0
static int
start_recover(void)
{
    int ret;
    int64_t epoch_counter;

    ompi_mtl_portals4.flowctl.flowctl_active = true;
    epoch_counter = opal_atomic_add_64(&ompi_mtl_portals4.flowctl.epoch_counter, 1);

    opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                        "Entering flowctl_start_recover %ld",
                        epoch_counter);

    /* re-arm trigger/alarm for next time */
    ret = setup_alarm(epoch_counter);
    if (OMPI_SUCCESS != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d setup_alarm failed: %d\n",
                            __FILE__, __LINE__, ret);
        return ret;
    }

    /* setup barrier tree for getting us out of flow control */
    ret = setup_barrier(epoch_counter);
    if (OMPI_SUCCESS != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d setup_barrier failed: %d\n",
                            __FILE__, __LINE__, ret);
        return ret;
    }

    /* drain all pending sends */
    while (ompi_mtl_portals4.flowctl.send_slots != 
           ompi_mtl_portals4.flowctl.max_send_slots) {
        opal_progress();
    }

    /* drain event queue */
    while (0 != ompi_mtl_portals4_progress()) { ; }

    /* check short block active count */
    ret = ompi_mtl_portals4_recv_short_link(1);
    if (OMPI_SUCCESS != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: recv_short_link failed: %d",
                            __FILE__, __LINE__, ret);
    }

    /* reorder the pending sends by operation count */
    ret = opal_list_sort(&ompi_mtl_portals4.flowctl.pending_sends, seqnum_compare);
    if (OMPI_SUCCESS != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d opal_list_sort failed: %d\n",
                            __FILE__, __LINE__, ret);
        return ret;
    }

    /* drain event queue again, just to make sure */
    while (0 != ompi_mtl_portals4_progress()) { ; }

    /* send barrier entry message */
    ret = PtlPut(ompi_mtl_portals4.zero_md_h,
                 0,
                 0,
                 PTL_NO_ACK_REQ,
                 ompi_mtl_portals4.flowctl.me,
                 ompi_mtl_portals4.flowctl_idx,
                 MTL_PORTALS4_FLOWCTL_FANIN,
                 0,
                 NULL,
                 0);
    if (OPAL_UNLIKELY(PTL_OK != ret)) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlPut failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    /* recovery complete when fan-out event arrives, async event, so
       we're done now */
    ret = OMPI_SUCCESS;

 error:
    OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                         "Exiting flowctl_start_recover %ld",
                         epoch_counter));

    return ret;
}