コード例 #1
0
ファイル: osc_pt2pt_comm.c プロジェクト: eddy16112/ompi
static int
ompi_osc_pt2pt_accumulate_w_req (const void *origin_addr, int origin_count,
                                struct ompi_datatype_t *origin_dt,
                                int target, OPAL_PTRDIFF_TYPE target_disp,
                                int target_count,
                                struct ompi_datatype_t *target_dt,
                                struct ompi_op_t *op, ompi_win_t *win,
                                ompi_osc_pt2pt_request_t *request)
{
    int ret;
    ompi_osc_pt2pt_module_t *module = GET_MODULE(win);
    ompi_proc_t *proc = ompi_comm_peer_lookup(module->comm, target);
    bool is_long_datatype = false;
    bool is_long_msg = false;
    ompi_osc_pt2pt_frag_t *frag;
    ompi_osc_pt2pt_header_acc_t *header;
    ompi_osc_pt2pt_sync_t *pt2pt_sync;
    size_t ddt_len, payload_len, frag_len;
    char *ptr;
    const void *packed_ddt;
    int tag = -1;

    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
                         "acc: 0x%lx, %d, %s, %d, %d, %d, %s, %s, %s",
                         (unsigned long) origin_addr, origin_count,
                         origin_dt->name, target, (int) target_disp,
                         target_count, target_dt->name, op->o_name,
                         win->w_name));

    pt2pt_sync = ompi_osc_pt2pt_module_sync_lookup (module, target, NULL);
    if (OPAL_UNLIKELY(NULL == pt2pt_sync)) {
        return OMPI_ERR_RMA_SYNC;
    }

    /* short-circuit case */
    if (0 == origin_count || 0 == target_count) {
        if (request) {
            ompi_osc_pt2pt_request_complete (request, MPI_SUCCESS);
        }

        return OMPI_SUCCESS;
    }

    /* optimize the self case. TODO: optimize the local case */
    if (ompi_comm_rank (module->comm) == target) {
        return ompi_osc_pt2pt_acc_self (pt2pt_sync, origin_addr, origin_count, origin_dt,
                                        target_disp, target_count, target_dt,
                                        op, module, request);
    }

    /* Compute datatype and payload lengths.  Note that the datatype description
     * must fit in a single frag */
    ddt_len = ompi_datatype_pack_description_length(target_dt);
    payload_len = origin_dt->super.size * origin_count;

    frag_len = sizeof(*header) + ddt_len + payload_len;
    ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr, false, true);
    if (OMPI_SUCCESS != ret) {
        frag_len = sizeof(*header) + ddt_len;
        ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr, true, !request);
        if (OMPI_SUCCESS != ret) {
            /* allocate space for the header plus space to store ddt_len */
            frag_len = sizeof(*header) + 8;
            ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr, true, !request);
            if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
                return OMPI_ERR_OUT_OF_RESOURCE;
            }

            is_long_datatype = true;
         }

        is_long_msg = true;
        tag = get_tag (module);
    } else {
        /* still need to set the tag for the active/passive logic on the target */
        tag = !!(module->passive_target_access_epoch);
    }

    if (is_long_msg) {
        /* wait for synchronization before posting a long message */
        if (pt2pt_sync->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK) {
            OPAL_THREAD_LOCK(&pt2pt_sync->lock);
            ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, target);
            while (!(peer->flags & OMPI_OSC_PT2PT_PEER_FLAG_EAGER)) {
                opal_condition_wait(&pt2pt_sync->cond, &pt2pt_sync->lock);
            }
            OPAL_THREAD_UNLOCK(&pt2pt_sync->lock);
        } else {
            ompi_osc_pt2pt_sync_wait_expected (pt2pt_sync);
        }
    }

    header = (ompi_osc_pt2pt_header_acc_t*) ptr;
    header->base.flags = 0;
    header->len = frag_len;
    header->count = target_count;
    header->displacement = target_disp;
    header->op = op->o_f_to_c_index;
    header->tag = tag;
    ptr += sizeof (*header);

    do {
        ret = ompi_datatype_get_pack_description(target_dt, &packed_ddt);
        if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
            break;
        }

        if (is_long_datatype) {
            /* the datatype does not fit in an eager message. send it seperately */
            header->base.flags |= OMPI_OSC_PT2PT_HDR_FLAG_LARGE_DATATYPE;

            OMPI_DATATYPE_RETAIN(target_dt);

            ret = ompi_osc_pt2pt_isend_w_cb ((void *) packed_ddt, ddt_len, MPI_BYTE,
                                            target, tag_to_target(tag), module->comm,
                                            ompi_osc_pt2pt_dt_send_complete, target_dt);
            if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
                break;
            }

            *((uint64_t *) ptr) = ddt_len;
            ptr += 8;
        } else {
            memcpy((unsigned char*) ptr, packed_ddt, ddt_len);
            ptr += ddt_len;
        }

        if (!is_long_msg) {
            header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_ACC;
            osc_pt2pt_hton(header, proc);

            osc_pt2pt_copy_for_send (ptr, payload_len, origin_addr, proc,
                                    origin_count, origin_dt);

            /* the user's buffer is no longer needed so mark the request as
             * complete. */
            if (request) {
                ompi_osc_pt2pt_request_complete (request, MPI_SUCCESS);
            }
        } else {
            header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_ACC_LONG;
            osc_pt2pt_hton(header, proc);

            OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
                                 "acc: starting long accumulate with tag %d", tag));

            ret = ompi_osc_pt2pt_data_isend (module, origin_addr, origin_count, origin_dt,
                                            target, tag_to_target(tag), request);
        }
    } while (0);

    if (OMPI_SUCCESS != ret) {
        OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
                             "acc: failed with eror %d", ret));
    } else {
        /* mark the fragment as valid */
        header->base.flags |= OMPI_OSC_PT2PT_HDR_FLAG_VALID;
    }

    return ompi_osc_pt2pt_frag_finish(module, frag);
}
コード例 #2
0
ファイル: osc_pt2pt_comm.c プロジェクト: situspanesse/ompi
static int
ompi_osc_pt2pt_accumulate_w_req (const void *origin_addr, int origin_count,
                                struct ompi_datatype_t *origin_dt,
                                int target, OPAL_PTRDIFF_TYPE target_disp,
                                int target_count,
                                struct ompi_datatype_t *target_dt,
                                struct ompi_op_t *op, ompi_win_t *win,
                                ompi_osc_pt2pt_request_t *request)
{
    int ret;
    ompi_osc_pt2pt_module_t *module = GET_MODULE(win);
    ompi_proc_t *proc = ompi_comm_peer_lookup(module->comm, target);
    bool is_long_datatype = false;
    bool is_long_msg = false;
    ompi_osc_pt2pt_frag_t *frag;
    ompi_osc_pt2pt_header_acc_t *header;
    ompi_osc_pt2pt_sync_t *pt2pt_sync;
    size_t ddt_len, payload_len, frag_len;
    char *ptr;
    const void *packed_ddt;
    int tag = -1;

    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
                         "acc: 0x%lx, %d, %s, %d, %d, %d, %s, %s, %s",
                         (unsigned long) origin_addr, origin_count,
                         origin_dt->name, target, (int) target_disp,
                         target_count, target_dt->name, op->o_name,
                         win->w_name));

    pt2pt_sync = ompi_osc_pt2pt_module_sync_lookup (module, target, NULL);
    if (OPAL_UNLIKELY(NULL == pt2pt_sync)) {
        return OMPI_ERR_RMA_SYNC;
    }

    /* short-circuit case */
    if (0 == origin_count || 0 == target_count) {
        if (request) {
            ompi_osc_pt2pt_request_complete (request, MPI_SUCCESS);
        }

        return OMPI_SUCCESS;
    }

    /* optimize the self case. TODO: optimize the local case */
    if (ompi_comm_rank (module->comm) == target) {
        return ompi_osc_pt2pt_acc_self (pt2pt_sync, origin_addr, origin_count, origin_dt,
                                        target_disp, target_count, target_dt,
                                        op, module, request);
    }

    /* Compute datatype and payload lengths.  Note that the datatype description
     * must fit in a single frag */
    ddt_len = ompi_datatype_pack_description_length(target_dt);
    payload_len = origin_dt->super.size * origin_count;

    frag_len = sizeof(*header) + ddt_len + payload_len;
    ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr);
    if (OMPI_SUCCESS != ret) {
        frag_len = sizeof(*header) + ddt_len;
        ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr);
        if (OMPI_SUCCESS != ret) {
            /* allocate space for the header plus space to store ddt_len */
            frag_len = sizeof(*header) + 8;
            ret = ompi_osc_pt2pt_frag_alloc(module, target, frag_len, &frag, &ptr);
            if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
                return OMPI_ERR_OUT_OF_RESOURCE;
            }

            is_long_datatype = true;
         }

        is_long_msg = true;
        tag = get_rtag (module);
    }

    /* flush will be called at the end of this function. make sure all post messages have
     * arrived. */
    if ((is_long_msg || request) && OMPI_OSC_PT2PT_SYNC_TYPE_PSCW == pt2pt_sync->type) {
        ompi_osc_pt2pt_sync_wait (pt2pt_sync);
    }

    header = (ompi_osc_pt2pt_header_acc_t*) ptr;
    header->base.flags = 0;
    header->len = frag_len;
    header->count = target_count;
    header->displacement = target_disp;
    header->op = op->o_f_to_c_index;
    ptr += sizeof (*header);

    do {
        ret = ompi_datatype_get_pack_description(target_dt, &packed_ddt);
        if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
            break;
        }

        if (is_long_datatype) {
            /* the datatype does not fit in an eager message. send it seperately */
            header->base.flags |= OMPI_OSC_PT2PT_HDR_FLAG_LARGE_DATATYPE;

            OBJ_RETAIN(target_dt);

            ret = ompi_osc_pt2pt_isend_w_cb ((void *) packed_ddt, ddt_len, MPI_BYTE, target,
                                            tag, module->comm, ompi_osc_pt2pt_dt_send_complete,
                                            target_dt);
            if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
                break;
            }

            *((uint64_t *) ptr) = ddt_len;
            ptr += 8;
        } else {
            memcpy((unsigned char*) ptr, packed_ddt, ddt_len);
            ptr += ddt_len;
        }

        if (!is_long_msg) {
            header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_ACC;
            osc_pt2pt_hton(header, proc);

            osc_pt2pt_copy_for_send (ptr, payload_len, origin_addr, proc,
                                    origin_count, origin_dt);

            /* the user's buffer is no longer needed so mark the request as
             * complete. */
            if (request) {
                ompi_osc_pt2pt_request_complete (request, MPI_SUCCESS);
            }
        } else {
            header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_ACC_LONG;
            header->tag = tag;
            osc_pt2pt_hton(header, proc);

            OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
                                 "acc: starting long accumulate with tag %d", tag));

            /* increment the outgoing send count */
            ompi_osc_signal_outgoing (module, target, 1);

            if (request) {
                request->outstanding_requests = 1;
                ret = ompi_osc_pt2pt_isend_w_cb (origin_addr, origin_count, origin_dt,
                                                target, tag, module->comm, ompi_osc_pt2pt_req_comm_complete,
                                                request);
            } else {
                ret = ompi_osc_pt2pt_component_isend (module, origin_addr, origin_count, origin_dt, target, tag,
                                                     module->comm);
            }
        }
    } while (0);

    if (OMPI_SUCCESS != ret) {
        OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
                             "acc: failed with eror %d", ret));
    } else {
        /* mark the fragment as valid */
        header->base.flags |= OMPI_OSC_PT2PT_HDR_FLAG_VALID;
    }

    ret = ompi_osc_pt2pt_frag_finish(module, frag);

    if (is_long_msg || request) {
        /* need to flush now in case the caller decides to wait on the request */
        ompi_osc_pt2pt_frag_flush_target (module, target);
    }

    return ret;
}