Exemple #1
0
static int clear_nacks(ptl_process_t target_id)
{
    struct rptl_target *target;
    struct rptl_op *op;
    int ret = PTL_OK;
    MPIDI_STATE_DECL(MPID_STATE_CLEAR_NACKS);

    MPIDI_FUNC_ENTER(MPID_STATE_CLEAR_NACKS);

    ret = find_target(target_id, &target);
    RPTLU_ERR_POP(ret, "error finding target\n");

    for (op = target->data_op_list; op; op = op->next) {
        if ((op->op_type == RPTL_OP_PUT && IDS_ARE_EQUAL(op->u.put.target_id, target_id)) ||
            (op->op_type == RPTL_OP_GET && IDS_ARE_EQUAL(op->u.get.target_id, target_id))) {
            if (op->state == RPTL_OP_STATE_NACKED)
                op->state = RPTL_OP_STATE_QUEUED;
        }
    }
    target->state = RPTL_TARGET_STATE_ACTIVE;

    ret = poke_progress();
    RPTLU_ERR_POP(ret, "error in poke_progress\n");

  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_CLEAR_NACKS);
    return ret;

  fn_fail:
    goto fn_exit;
}
Exemple #2
0
static int rptl_put(ptl_handle_md_t md_handle, ptl_size_t local_offset, ptl_size_t length,
                    ptl_ack_req_t ack_req, ptl_process_t target_id, ptl_pt_index_t pt_index,
                    ptl_match_bits_t match_bits, ptl_size_t remote_offset, void *user_ptr,
                    ptl_hdr_data_t hdr_data, enum rptl_pt_type pt_type)
{
    struct rptl_op *op;
    int ret = PTL_OK;
    struct rptl_target *target;
    MPIDI_STATE_DECL(MPID_STATE_RPTL_PUT);

    MPIDI_FUNC_ENTER(MPID_STATE_RPTL_PUT);

    ret = find_target(target_id, &target);
    RPTLU_ERR_POP(ret, "error finding target structure\n");

    ret = rptli_op_alloc(&op, target);
    RPTLU_ERR_POP(ret, "error allocating op\n");

    op->op_type = RPTL_OP_PUT;
    op->state = RPTL_OP_STATE_QUEUED;

    /* store the user parameters */
    op->u.put.md_handle = md_handle;
    op->u.put.local_offset = local_offset;
    op->u.put.length = length;
    op->u.put.ack_req = ack_req;
    op->u.put.target_id = target_id;
    op->u.put.pt_index = pt_index;
    op->u.put.match_bits = match_bits;
    op->u.put.remote_offset = remote_offset;
    op->u.put.user_ptr = user_ptr;
    op->u.put.hdr_data = hdr_data;

    /* place to store the send and ack events */
    op->u.put.send = NULL;
    op->u.put.ack = NULL;
    op->u.put.pt_type = pt_type;
    op->events_ready = 0;
    op->target = target;

    if (op->u.put.pt_type == RPTL_PT_DATA)
        MPL_DL_APPEND(target->data_op_list, op);
    else
        MPL_DL_APPEND(target->control_op_list, op);

    ret = poke_progress();
    RPTLU_ERR_POP(ret, "Error from poke_progress\n");

  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_RPTL_PUT);
    return ret;

  fn_fail:
    goto fn_exit;
}
Exemple #3
0
int MPID_nem_ptl_rptl_ptinit(ptl_handle_ni_t ni_handle, ptl_handle_eq_t eq_handle, ptl_pt_index_t data_pt,
                             ptl_pt_index_t control_pt)
{
    int ret = PTL_OK;
    struct rptl *rptl;
    int mpi_errno = MPI_SUCCESS;
    int i;
    ptl_md_t md;
    MPIU_CHKPMEM_DECL(2);
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_RPTL_PTINIT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_RPTL_PTINIT);


    /* setup the parts of rptls that can be done before world size or
     * target information */
    MPIU_CHKPMEM_MALLOC(rptl, struct rptl *, sizeof(struct rptl), mpi_errno, "rptl");
    MPL_DL_APPEND(rptl_info.rptl_list, rptl);

    rptl->local_state = RPTL_LOCAL_STATE_ACTIVE;
    rptl->pause_ack_counter = 0;

    rptl->data.ob_max_count = 0;
    rptl->data.ob_curr_count = 0;

    rptl->data.pt = data_pt;
    rptl->control.pt = control_pt;

    rptl->ni = ni_handle;
    rptl->eq = eq_handle;

    md.start = 0;
    md.length = (ptl_size_t) (-1);
    md.options = 0x0;
    md.eq_handle = rptl->eq;
    md.ct_handle = PTL_CT_NONE;
    ret = PtlMDBind(rptl->ni, &md, &rptl->md);
    RPTLU_ERR_POP(ret, "Error binding new global MD\n");

    /* post world_size number of empty buffers on the control portal */
    if (rptl->control.pt != PTL_PT_ANY) {
        MPIU_CHKPMEM_MALLOC(rptl->control.me, ptl_handle_me_t *,
                            2 * rptl_info.world_size * sizeof(ptl_handle_me_t), mpi_errno,
                            "rptl target info");
        for (i = 0; i < 2 * rptl_info.world_size; i++) {
            ret = rptli_post_control_buffer(rptl->ni, rptl->control.pt, &rptl->control.me[i]);
            RPTLU_ERR_POP(ret, "Error in rptli_post_control_buffer\n");
        }
        rptl->control.me_idx = 0;
    }
Exemple #4
0
int MPID_nem_ptl_rptl_ptfini(ptl_pt_index_t pt_index)
{
    int i;
    int ret = PTL_OK;
    struct rptl *rptl;
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_RPTL_PTFINI);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_RPTL_PTFINI);

    /* find the right rptl */
    for (rptl = rptl_info.rptl_list; rptl && rptl->data.pt != pt_index; rptl = rptl->next);
    assert(rptl);

    /* free control portals that were created */
    if (rptl->control.pt != PTL_PT_ANY) {
        for (i = 0; i < rptl_info.world_size * 2; i++) {
            ret = PtlMEUnlink(rptl->control.me[i]);
            RPTLU_ERR_POP(ret, "Error unlinking control buffers\n");
        }
        MPIU_Free(rptl->control.me);
    }

    MPL_DL_DELETE(rptl_info.rptl_list, rptl);
    MPIU_Free(rptl);

  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_RPTL_PTFINI);
    return ret;

  fn_fail:
    goto fn_exit;
}
Exemple #5
0
int MPID_nem_ptl_rptl_get(ptl_handle_md_t md_handle, ptl_size_t local_offset, ptl_size_t length,
                          ptl_process_t target_id, ptl_pt_index_t pt_index,
                          ptl_match_bits_t match_bits, ptl_size_t remote_offset, void *user_ptr)
{
    struct rptl_op *op;
    int ret = PTL_OK;
    struct rptl_target *target;
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_RPTL_GET);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_RPTL_GET);

    ret = find_target(target_id, &target);
    RPTLU_ERR_POP(ret, "error finding target structure\n");

    ret = rptli_op_alloc(&op, target);
    RPTLU_ERR_POP(ret, "error allocating op\n");

    op->op_type = RPTL_OP_GET;
    op->state = RPTL_OP_STATE_QUEUED;

    /* store the user parameters */
    op->u.get.md_handle = md_handle;
    op->u.get.local_offset = local_offset;
    op->u.get.length = length;
    op->u.get.target_id = target_id;
    op->u.get.pt_index = pt_index;
    op->u.get.match_bits = match_bits;
    op->u.get.remote_offset = remote_offset;
    op->u.get.user_ptr = user_ptr;

    op->events_ready = 0;
    op->target = target;

    MPL_DL_APPEND(target->data_op_list, op);

    ret = poke_progress();
    RPTLU_ERR_POP(ret, "Error from poke_progress\n");

  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_RPTL_GET);
    return ret;

  fn_fail:
    goto fn_exit;
}
Exemple #6
0
static int send_pause_messages(struct rptl *rptl)
{
    int i, mpi_errno = MPI_SUCCESS;
    ptl_process_t id;
    ptl_pt_index_t data_pt, control_pt;
    int ret = PTL_OK;
    MPIDI_STATE_DECL(MPID_STATE_SEND_PAUSE_MESSAGES);

    MPIDI_FUNC_ENTER(MPID_STATE_SEND_PAUSE_MESSAGES);

    /* if no control portal is setup for this rptl, we are doomed */
    assert(rptl->control.pt != PTL_PT_ANY);

    /* set the max message count in the overflow buffers we can keep
     * before sending the unpause messages */
    rptl->data.ob_max_count = rptl->data.ob_curr_count / 2;

    for (i = 0; i < rptl_info.world_size; i++) {
        if (i == MPIDI_Process.my_pg_rank)
            continue;
        mpi_errno = rptl_info.get_target_info(i, &id, rptl->data.pt, &data_pt, &control_pt);
        if (mpi_errno) {
            ret = PTL_FAIL;
            RPTLU_ERR_POP(ret, "Error getting target info while sending pause messages\n");
        }

        /* make sure the user setup a control portal */
        assert(control_pt != PTL_PT_ANY);

        ret = rptl_put(rptl->md, 0, 0, PTL_NO_ACK_REQ, id, control_pt, 0, 0,
                                    NULL, RPTL_CONTROL_MSG_PAUSE, RPTL_PT_CONTROL);
        RPTLU_ERR_POP(ret, "Error sending pause message\n");
    }

  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_SEND_PAUSE_MESSAGES);
    return ret;

  fn_fail:
    goto fn_exit;
}
Exemple #7
0
int MPID_nem_ptl_rptl_drain_eq(int eq_count, ptl_handle_eq_t *eq)
{
    int ret = PTL_OK;
    ptl_event_t event;
    struct rptl_op_pool_segment *op_segment;
    int i;
    struct rptl_target *target, *t;
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_RPTL_FINALIZE);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_RPTL_FINALIZE);

    for (target = rptl_info.target_list; target; target = target->next) {
        while (target->control_op_list || target->data_op_list) {
            for (i = 0; i < eq_count; i++) {
                /* read and ignore all events */
                ret = MPID_nem_ptl_rptl_eqget(eq[i], &event);
                if (ret == PTL_EQ_EMPTY)
                    ret = PTL_OK;
                RPTLU_ERR_POP(ret, "Error calling MPID_nem_ptl_rptl_eqget\n");
            }
        }
    }

    for (target = rptl_info.target_list; target;) {
        assert(target->data_op_list == NULL);
        assert(target->control_op_list == NULL);

        while (target->op_segment_list) {
            op_segment = target->op_segment_list;
            MPL_DL_DELETE(target->op_segment_list, op_segment);
            MPIU_Free(op_segment);
        }

        t = target->next;
        MPIU_Free(target);
        target = t;
    }

  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_RPTL_FINALIZE);
    return ret;

  fn_fail:
    goto fn_exit;
}
Exemple #8
0
static int get_event_info(ptl_event_t * event, struct rptl **ret_rptl, struct rptl_op **ret_op)
{
    struct rptl *rptl;
    struct rptl_op *op;
    int ret = PTL_OK;
    MPIDI_STATE_DECL(MPID_STATE_GET_EVENT_INFO);

    MPIDI_FUNC_ENTER(MPID_STATE_GET_EVENT_INFO);

    if (event->type == PTL_EVENT_SEND || event->type == PTL_EVENT_REPLY ||
        event->type == PTL_EVENT_ACK) {
        op = (struct rptl_op *) event->user_ptr;

        rptl_info.origin_events_left++;
        if (event->type != PTL_EVENT_SEND)
            op->target->issued_data_ops--;

        /* see if there are any pending ops to be issued */
        ret = poke_progress();
        RPTLU_ERR_POP(ret, "Error returned from poke_progress\n");

        assert(op);
        rptl = NULL;
    }
    else {
        /* for all target-side events, we look up the rptl based on
         * the pt_index */
        for (rptl = rptl_info.rptl_list; rptl; rptl = rptl->next)
            if (rptl->data.pt == event->pt_index || rptl->control.pt == event->pt_index)
                break;

        assert(rptl);
        op = NULL;
    }

    *ret_rptl = rptl;
    *ret_op = op;

  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_GET_EVENT_INFO);
    return ret;

  fn_fail:
    goto fn_exit;
}
Exemple #9
0
int rptli_post_control_buffer(ptl_handle_ni_t ni_handle, ptl_pt_index_t pt,
                              ptl_handle_me_t * me_handle)
{
    int ret;
    ptl_me_t me;
    ptl_process_t id;
    MPIDI_STATE_DECL(MPID_STATE_RPTLI_POST_CONTROL_BUFFER);

    MPIDI_FUNC_ENTER(MPID_STATE_RPTLI_POST_CONTROL_BUFFER);

    id.phys.nid = PTL_NID_ANY;
    id.phys.pid = PTL_PID_ANY;

    me.start = NULL;
    me.length = 0;
    me.ct_handle = PTL_CT_NONE;
    me.uid = PTL_UID_ANY;
    me.options = (PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_USE_ONCE | PTL_ME_IS_ACCESSIBLE |
                  PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE);
    me.match_id = id;
    me.match_bits = 0;
    me.ignore_bits = 0;
    me.min_free = 0;

    while (1) {
        ret = PtlMEAppend(ni_handle, pt, &me, PTL_PRIORITY_LIST, NULL, me_handle);
        if (ret != PTL_NO_SPACE)
            break;
    }
    RPTLU_ERR_POP(ret, "Error appending empty buffer to priority list\n");

  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_RPTLI_POST_CONTROL_BUFFER);
    return ret;

  fn_fail:
    goto fn_exit;
}
Exemple #10
0
static int poke_progress(void)
{
    int ret = PTL_OK;
    struct rptl_target *target;
    struct rptl_op *op;
    struct rptl *rptl;
    int i;
    int mpi_errno = MPI_SUCCESS;
    ptl_process_t id;
    ptl_pt_index_t data_pt, control_pt;
    MPIDI_STATE_DECL(MPID_STATE_POKE_PROGRESS);

    MPIDI_FUNC_ENTER(MPID_STATE_POKE_PROGRESS);

    /* make progress on local RPTLs */
    for (rptl = rptl_info.rptl_list; rptl; rptl = rptl->next) {
        /* if the local state is active, there's nothing to do */
        if (rptl->local_state == RPTL_LOCAL_STATE_ACTIVE)
            continue;

        /* if we are in a local AWAITING PAUSE ACKS state, see if we
         * can send out the unpause message */
        if (rptl->local_state == RPTL_LOCAL_STATE_AWAITING_PAUSE_ACKS &&
            rptl->pause_ack_counter == rptl_info.world_size - 1) {
            /* if we are over the max count limit, do not send an
             * unpause message yet */
            if (rptl->data.ob_curr_count > rptl->data.ob_max_count)
                continue;

            ret = PtlPTEnable(rptl->ni, rptl->data.pt);
            RPTLU_ERR_POP(ret, "Error returned while reenabling PT\n");

            rptl->local_state = RPTL_LOCAL_STATE_ACTIVE;

            for (i = 0; i < rptl_info.world_size; i++) {
                if (i == MPIDI_Process.my_pg_rank)
                    continue;
                mpi_errno = rptl_info.get_target_info(i, &id, rptl->data.pt, &data_pt, &control_pt);
                if (mpi_errno) {
                    ret = PTL_FAIL;
                    RPTLU_ERR_POP(ret, "Error getting target info\n");
                }

                /* make sure the user setup a control portal */
                assert(control_pt != PTL_PT_ANY);

                ret = rptl_put(rptl->md, 0, 0, PTL_NO_ACK_REQ, id, control_pt,
                               0, 0, NULL, RPTL_CONTROL_MSG_UNPAUSE, RPTL_PT_CONTROL);
                RPTLU_ERR_POP(ret, "Error sending unpause message\n");
            }
        }
    }

    /* make progress on targets */
    for (target = rptl_info.target_list; target; target = target->next) {
        if (target->state == RPTL_TARGET_STATE_RECEIVED_PAUSE) {
            for (op = target->data_op_list; op; op = op->next)
                if (op->state == RPTL_OP_STATE_ISSUED)
                    break;
            if (op)
                continue;

            /* send a pause ack message */
            assert(target->rptl);
            for (i = 0; i < rptl_info.world_size; i++) {
                if (i == MPIDI_Process.my_pg_rank)
                    continue;
                /* find the target that has this target id and get the
                 * control portal information for it */
                mpi_errno = rptl_info.get_target_info(i, &id, target->rptl->data.pt, &data_pt, &control_pt);
                if (mpi_errno) {
                    ret = PTL_FAIL;
                    RPTLU_ERR_POP(ret, "Error getting target info\n");
                }
                if (IDS_ARE_EQUAL(id, target->id))
                    break;
            }

            /* make sure the user setup a control portal */
            assert(control_pt != PTL_PT_ANY);

            target->state = RPTL_TARGET_STATE_PAUSE_ACKED;

            ret = rptl_put(target->rptl->md, 0, 0, PTL_NO_ACK_REQ, id, control_pt, 0,
                           0, NULL, RPTL_CONTROL_MSG_PAUSE_ACK, RPTL_PT_CONTROL);
            RPTLU_ERR_POP(ret, "Error sending pause ack message\n");

            continue;
        }

        /* issue out all the control messages first */
        for (op = target->control_op_list; op; op = op->next) {
            assert(op->op_type == RPTL_OP_PUT);

            /* skip all the issued ops */
            if (op->state == RPTL_OP_STATE_ISSUED)
                continue;

            /* we should not get any NACKs on the control portal */
            assert(op->state != RPTL_OP_STATE_NACKED);

            if (rptl_info.origin_events_left < 2 || target->issued_data_ops > PER_TARGET_THRESHOLD) {
                /* too few origin events left.  we can't issue this op
                 * or any following op to this target in order to
                 * maintain ordering */
                break;
            }

            rptl_info.origin_events_left -= 2;
            target->issued_data_ops++;

            /* force request for an ACK even if the user didn't ask
             * for it.  replace the user pointer with the OP id. */
            ret = PtlPut(op->u.put.md_handle, op->u.put.local_offset, op->u.put.length,
                         PTL_ACK_REQ, op->u.put.target_id, op->u.put.pt_index,
                         op->u.put.match_bits, op->u.put.remote_offset, op,
                         op->u.put.hdr_data);
            RPTLU_ERR_POP(ret, "Error issuing PUT\n");

            op->state = RPTL_OP_STATE_ISSUED;
        }

        if (target->state == RPTL_TARGET_STATE_DISABLED || target->state == RPTL_TARGET_STATE_PAUSE_ACKED)
            continue;

        /* then issue out all the data messages */
        for (op = target->data_op_list; op; op = op->next) {
            if (op->op_type == RPTL_OP_PUT) {
                /* skip all the issued ops */
                if (op->state == RPTL_OP_STATE_ISSUED)
                    continue;

                /* if an op has been nacked, don't issue anything else
                 * to this target */
                if (op->state == RPTL_OP_STATE_NACKED)
                    break;

                if (rptl_info.origin_events_left < 2 || target->issued_data_ops > PER_TARGET_THRESHOLD) {
                    /* too few origin events left.  we can't issue
                     * this op or any following op to this target in
                     * order to maintain ordering */
                    break;
                }

                rptl_info.origin_events_left -= 2;
                target->issued_data_ops++;

                /* force request for an ACK even if the user didn't
                 * ask for it.  replace the user pointer with the OP
                 * id. */
                ret = PtlPut(op->u.put.md_handle, op->u.put.local_offset, op->u.put.length,
                             PTL_ACK_REQ, op->u.put.target_id, op->u.put.pt_index,
                             op->u.put.match_bits, op->u.put.remote_offset, op,
                             op->u.put.hdr_data);
                RPTLU_ERR_POP(ret, "Error issuing PUT\n");
            }
            else if (op->op_type == RPTL_OP_GET) {
                /* skip all the issued ops */
                if (op->state == RPTL_OP_STATE_ISSUED)
                    continue;

                /* if an op has been nacked, don't issue anything else
                 * to this target */
                if (op->state == RPTL_OP_STATE_NACKED)
                    break;

                if (rptl_info.origin_events_left < 1 || target->issued_data_ops > PER_TARGET_THRESHOLD) {
                    /* too few origin events left.  we can't issue
                     * this op or any following op to this target in
                     * order to maintain ordering */
                    break;
                }

                rptl_info.origin_events_left--;
                target->issued_data_ops++;

                ret = PtlGet(op->u.get.md_handle, op->u.get.local_offset, op->u.get.length,
                             op->u.get.target_id, op->u.get.pt_index, op->u.get.match_bits,
                             op->u.get.remote_offset, op);
                RPTLU_ERR_POP(ret, "Error issuing GET\n");
            }

            op->state = RPTL_OP_STATE_ISSUED;
        }
    }

  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_POKE_PROGRESS);
    return ret;

  fn_fail:
    goto fn_exit;
}