예제 #1
0
int
ompi_osc_sm_fence(int assert, struct ompi_win_t *win)
{
    ompi_osc_sm_module_t *module =
        (ompi_osc_sm_module_t*) win->w_osc_module;

    /* ensure all memory operations have completed */
    opal_atomic_mb();

    if (module->global_state->use_barrier_for_fence) {
        return module->comm->c_coll->coll_barrier(module->comm,
                                                 module->comm->c_coll->coll_barrier_module);
    } else {
        module->my_sense = !module->my_sense;
        pthread_mutex_lock(&module->global_state->mtx);
        module->global_state->count--;
        if (module->global_state->count == 0) {
            module->global_state->count = ompi_comm_size(module->comm);
            module->global_state->sense = module->my_sense;
            pthread_cond_broadcast(&module->global_state->cond);
        } else {
            while (module->global_state->sense != module->my_sense) {
                pthread_cond_wait(&module->global_state->cond, &module->global_state->mtx);
            }
        }
        pthread_mutex_unlock(&module->global_state->mtx);

        return OMPI_SUCCESS;
    }
}
예제 #2
0
int
ompi_osc_sm_flush_local_all(struct ompi_win_t *win)
{
    opal_atomic_mb();

    return OMPI_SUCCESS;
}
예제 #3
0
int
ompi_osc_sm_sync(struct ompi_win_t *win)
{
    opal_atomic_mb();

    return OMPI_SUCCESS;
}
예제 #4
0
static inline uint32_t
lk_fetch32(ompi_osc_sm_module_t *module,
           int target,
           size_t offset)
{
    opal_atomic_mb ();
    return *((uint32_t *)((char*) &module->node_states[target].lock + offset));
}
예제 #5
0
int
ompi_osc_sm_flush(int target,
                        struct ompi_win_t *win)
{
    opal_atomic_mb();

    return OMPI_SUCCESS;
}
예제 #6
0
int
ompi_osc_sm_start(struct ompi_group_t *group,
                  int assert,
                  struct ompi_win_t *win)
{
    ompi_osc_sm_module_t *module =
        (ompi_osc_sm_module_t*) win->w_osc_module;
    int my_rank = ompi_comm_rank (module->comm);
    void *_tmp_ptr = NULL;

    OBJ_RETAIN(group);

    if (!OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&module->start_group, (void *) &_tmp_ptr, group)) {
        OBJ_RELEASE(group);
        return OMPI_ERR_RMA_SYNC;
    }

    if (0 == (assert & MPI_MODE_NOCHECK)) {
        int size;

        int *ranks = ompi_osc_sm_group_ranks (module->comm->c_local_group, group);
        if (NULL == ranks) {
            return OMPI_ERR_OUT_OF_RESOURCE;
        }

        size = ompi_group_size(module->start_group);

        for (int i = 0 ; i < size ; ++i) {
            int rank_byte = ranks[i] >> OSC_SM_POST_BITS;
            osc_sm_post_type_t rank_bit = ((osc_sm_post_type_t) 1) << (ranks[i] & 0x3f);

            /* wait for rank to post */
            while (!(module->posts[my_rank][rank_byte] & rank_bit)) {
                opal_progress();
                opal_atomic_mb();
            }

            opal_atomic_rmb ();

#if OPAL_HAVE_ATOMIC_MATH_64
            (void) opal_atomic_fetch_xor_64 ((volatile int64_t *) module->posts[my_rank] + rank_byte, rank_bit);
#else
            (void) opal_atomic_fetch_xor_32 ((volatile int32_t *) module->posts[my_rank] + rank_byte, rank_bit);
#endif
       }

        free (ranks);
    }
예제 #7
0
int
ompi_osc_sm_unlock(int target,
                   struct ompi_win_t *win)
{
    ompi_osc_sm_module_t *module =
        (ompi_osc_sm_module_t*) win->w_osc_module;
    int ret;

    /* ensure all memory operations have completed */
    opal_atomic_mb();

    switch (module->outstanding_locks[target]) {
    case lock_none:
        return OMPI_ERR_RMA_SYNC;

    case lock_nocheck:
        ret = OMPI_SUCCESS;
        break;

    case lock_exclusive:
        ret = end_exclusive(module, target);
        break;

    case lock_shared:
        ret = end_shared(module, target);
        break;

    default:
        // This is an OMPI programming error -- cause some pain.
        assert(module->outstanding_locks[target] == lock_none ||
               module->outstanding_locks[target] == lock_nocheck ||
               module->outstanding_locks[target] == lock_exclusive ||
               module->outstanding_locks[target] == lock_shared);

         // In non-developer builds, assert() will be a no-op, so
         // ensure the error gets reported
        opal_output(0, "Unknown lock type in ompi_osc_sm_unlock -- this is an OMPI programming error");
        ret = OMPI_ERR_BAD_PARAM;
        break;
    }

    module->outstanding_locks[target] = lock_none;

    return ret;
}
예제 #8
0
int
ompi_osc_sm_start(struct ompi_group_t *group,
                  int assert,
                  struct ompi_win_t *win)
{
    ompi_osc_sm_module_t *module =
        (ompi_osc_sm_module_t*) win->w_osc_module;
    int my_rank = ompi_comm_rank (module->comm);

    OBJ_RETAIN(group);

    if (!OPAL_ATOMIC_CMPSET_PTR(&module->start_group, NULL, group)) {
        OBJ_RELEASE(group);
        return OMPI_ERR_RMA_SYNC;
    }

    if (0 == (assert & MPI_MODE_NOCHECK)) {
        int size;

        int *ranks = ompi_osc_sm_group_ranks (module->comm->c_local_group, group);
        if (NULL == ranks) {
            return OMPI_ERR_OUT_OF_RESOURCE;
        }

        size = ompi_group_size(module->start_group);

        for (int i = 0 ; i < size ; ++i) {
            int rank_byte = ranks[i] >> 6;
            uint64_t old, rank_bit = ((uint64_t) 1) << (ranks[i] & 0x3f);

            /* wait for rank to post */
            while (!(module->posts[my_rank][rank_byte] & rank_bit)) {
                opal_progress();
                opal_atomic_mb();
            }

            opal_atomic_rmb ();

            do {
                old = module->posts[my_rank][rank_byte];
            } while (!opal_atomic_cmpset_64 ((int64_t *) module->posts[my_rank] + rank_byte, old, old ^ rank_bit));
       }

        free (ranks);
    }
예제 #9
0
int ompi_osc_ucx_sync(struct ompi_win_t *win) {
    ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t *)win->w_osc_module;
    ucs_status_t status;

    if (module->epoch_type.access != PASSIVE_EPOCH &&
        module->epoch_type.access != PASSIVE_ALL_EPOCH) {
        return OMPI_ERR_RMA_SYNC;
    }

    opal_atomic_mb();

    status = ucp_worker_fence(mca_osc_ucx_component.ucp_worker);
    if (status != UCS_OK) {
        OSC_UCX_VERBOSE(1, "ucp_worker_fence failed: %d", status);
        return OMPI_ERROR;
    }

    return OMPI_SUCCESS;
}
예제 #10
0
/* Non blocking test for the request status. Upon completion, the request will
 * not be freed (unlike the test function). A subsequent call to test, wait
 * or free should be executed on the request.
 */
int MPI_Request_get_status(MPI_Request request, int *flag,
                           MPI_Status *status) 
{
    if( MPI_PARAM_CHECK ) {
        OMPI_ERR_INIT_FINALIZE(FUNC_NAME);
        if( (NULL == flag) || (NULL == status) ) {
            return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME);
        }
    }

    opal_atomic_mb();
    if( (request == MPI_REQUEST_NULL) || (request->req_state == OMPI_REQUEST_INACTIVE) ) {
        *flag = true;
        if( MPI_STATUS_IGNORE != status ) {
            *status = ompi_status_empty;
        }
        return MPI_SUCCESS;
    }
    if( request->req_complete ) { 
        *flag = true; 
        /* If this is a generalized request, we *always* have to call
           the query function to get the status (MPI-2:8.2), even if
           the user passed STATUS_IGNORE. */
        if (OMPI_REQUEST_GEN == request->req_type) {
            ompi_grequest_invoke_query(request, &request->req_status);
        }
        if (MPI_STATUS_IGNORE != status) {
            *status = request->req_status;
        }
        return MPI_SUCCESS;
    }
    *flag = false;
#if OMPI_ENABLE_PROGRESS_THREADS == 0
    opal_progress();
#endif
    return MPI_SUCCESS;
}
예제 #11
0
int ompi_request_default_test_any(
    size_t count,
    ompi_request_t ** requests,
    int *index,
    int *completed,
    ompi_status_public_t * status)
{
    size_t i;
    size_t num_requests_null_inactive = 0;
    ompi_request_t **rptr;
    ompi_request_t *request;

    opal_atomic_mb();
    rptr = requests;
    for (i = 0; i < count; i++, rptr++) {
        request = *rptr;
        if( request->req_state == OMPI_REQUEST_INACTIVE ) {
            num_requests_null_inactive++;
            continue;
        }

        if( request->req_complete ) {
            OMPI_CRCP_REQUEST_COMPLETE(request);

            *index = i;
            *completed = true;
            /* MPI 2:8.2 says that generalized requests always have
               the query function invoked in TEST* / WAIT*
               (#@$%@#$%!!! Would have been simpler to call it in
               GREQUEST_COMPLETE!), even if the user passed in
               STATUS_IGNORE */
            if (OMPI_REQUEST_GEN == request->req_type) {
                ompi_grequest_invoke_query(request, &request->req_status);
                if (MPI_STATUS_IGNORE != status) {
                    /* Do *NOT* set a new value for status->MPI_ERROR
                       here!  See MPI-1.1 doc, sec 3.2.5, p.22 */
                    int old_error = status->MPI_ERROR;
                    *status = request->req_status;
                    status->MPI_ERROR = old_error;
                }
            } else if (MPI_STATUS_IGNORE != status) {
                /* Do *NOT* set a new value for status->MPI_ERROR
                   here!  See MPI-1.1 doc, sec 3.2.5, p.22 */
                int old_error = status->MPI_ERROR;
                *status = request->req_status;
                status->MPI_ERROR = old_error;
            }

            if( request->req_persistent ) {
                request->req_state = OMPI_REQUEST_INACTIVE;
                return OMPI_SUCCESS;
            }
            /* If there is an error on the request, don't free it */
            if (MPI_SUCCESS != request->req_status.MPI_ERROR) {
                return request->req_status.MPI_ERROR;
            }
            /* If there's an error while freeing the request, assume
               that the request is still there.  Otherwise, Bad Things
               will happen later! */
            return ompi_request_free(rptr);
        }
    }

    /* Only fall through here if we found nothing */
    *index = MPI_UNDEFINED;
    if(num_requests_null_inactive != count) {
        *completed = false;
#if OMPI_ENABLE_PROGRESS_THREADS == 0
        opal_progress();
#endif
    } else {
        *completed = true;
        if (MPI_STATUS_IGNORE != status) {
            *status = ompi_status_empty;
        }
    }
    return OMPI_SUCCESS;
}
예제 #12
0
int ompi_request_default_test( ompi_request_t ** rptr,
                       int *completed,
                       ompi_status_public_t * status )
{
    ompi_request_t *request = *rptr;
#if OMPI_ENABLE_PROGRESS_THREADS == 0
    int do_it_once = 0;

 recheck_request_status:
#endif
    opal_atomic_mb();
    if( request->req_state == OMPI_REQUEST_INACTIVE ) {
        *completed = true;
        if (MPI_STATUS_IGNORE != status) {
            *status = ompi_status_empty;
        }
        return OMPI_SUCCESS;
    }

    if (request->req_complete) {
        OMPI_CRCP_REQUEST_COMPLETE(request);

        *completed = true;
        /* For a generalized request, we *have* to call the query_fn
           if it completes, even if the user provided
           STATUS_IGNORE.  See MPI-2:8.2. */
        if (OMPI_REQUEST_GEN == request->req_type) {
            ompi_grequest_invoke_query(request, &request->req_status);
            if (MPI_STATUS_IGNORE != status) {
                int old_error = status->MPI_ERROR;
                *status = request->req_status;
                status->MPI_ERROR = old_error;
            }
        } else if (MPI_STATUS_IGNORE != status) {
            /* Do *NOT* set a new value for status->MPI_ERROR here!
               See MPI-1.1 doc, sec 3.2.5, p.22 */
            int old_error = status->MPI_ERROR;
            *status = request->req_status;
            status->MPI_ERROR = old_error;
        }
        if( request->req_persistent ) {
            request->req_state = OMPI_REQUEST_INACTIVE;
            return request->req_status.MPI_ERROR;
        }
        /* If there was an error, don't free the request -- just
           return the single error. */
        if (MPI_SUCCESS != request->req_status.MPI_ERROR) {
            return request->req_status.MPI_ERROR;
        }
        /* If there's an error on the request, assume that the request
           is still there.  Otherwise, Bad Things will happen
           later! */
        return ompi_request_free(rptr);
    }
#if OMPI_ENABLE_PROGRESS_THREADS == 0
    if( 0 == do_it_once ) {
        /**
         * If we run the opal_progress then check the status of the request before
         * leaving. We will call the opal_progress only once per call.
         */
        opal_progress();
        do_it_once++;
        goto recheck_request_status;
    }
#endif
    *completed = false;
    return OMPI_SUCCESS;
}
예제 #13
0
int ompi_request_default_test_some(
    size_t count,
    ompi_request_t ** requests,
    int * outcount,
    int * indices,
    ompi_status_public_t * statuses)
{
    size_t i, num_requests_null_inactive=0, num_requests_done = 0;
    int rc = OMPI_SUCCESS;
    ompi_request_t **rptr;
    ompi_request_t *request;

    opal_atomic_mb();
    rptr = requests;
    for (i = 0; i < count; i++, rptr++) {
        request = *rptr;
        if (request->req_state == OMPI_REQUEST_INACTIVE) {
            num_requests_null_inactive++;
            continue;
        }
        if (true == request->req_complete) {
            OMPI_CRCP_REQUEST_COMPLETE(request);
            indices[num_requests_done++] = i;
        }
    }

    /*
     * If there are no active requests, no need to progress
     */
    if (num_requests_null_inactive == count) {
        *outcount = MPI_UNDEFINED;
        return OMPI_SUCCESS;
    }

    *outcount = num_requests_done;

    if (num_requests_done == 0) {
#if OMPI_ENABLE_PROGRESS_THREADS == 0
        opal_progress();
#endif
        return OMPI_SUCCESS;
    }

    /* fill out completion status and free request if required */
    for( i = 0; i < num_requests_done; i++) {
        request = requests[indices[i]];

        /* See note above: if a generalized request completes, we
           *have* to call the query fn, even if STATUSES_IGNORE
           was supplied */
        if (OMPI_REQUEST_GEN == request->req_type) {
            ompi_grequest_invoke_query(request, &request->req_status);
        }
        if (MPI_STATUSES_IGNORE != statuses) {
            statuses[i] = request->req_status;
        }

        if (MPI_SUCCESS != request->req_status.MPI_ERROR) {
            rc = MPI_ERR_IN_STATUS;
        }

        if( request->req_persistent ) {
            request->req_state = OMPI_REQUEST_INACTIVE;
        } else {
            /* Only free the request if there was no error */
            if (MPI_SUCCESS == request->req_status.MPI_ERROR) {
                int tmp;
                tmp = ompi_request_free(&(requests[indices[i]]));
                if (OMPI_SUCCESS != tmp) {
                    return tmp;
                }
            }
        }
    }

    return rc;
}
예제 #14
0
int ompi_request_default_test_all(
    size_t count,
    ompi_request_t ** requests,
    int *completed,
    ompi_status_public_t * statuses)
{
    size_t i, rc;
    ompi_request_t **rptr;
    size_t num_completed = 0;
    ompi_request_t *request;

    opal_atomic_mb();
    rptr = requests;
    for (i = 0; i < count; i++, rptr++) {
        request = *rptr;

        if( request->req_state == OMPI_REQUEST_INACTIVE ||
            request->req_complete) {
            OMPI_CRCP_REQUEST_COMPLETE(request);
            num_completed++;
        }
    }

    if (num_completed != count) {
        *completed = false;
#if OMPI_ENABLE_PROGRESS_THREADS == 0
        opal_progress();
#endif
        return OMPI_SUCCESS;
    }

    rptr = requests;
    *completed = true;

    rc = MPI_SUCCESS;
    if (MPI_STATUSES_IGNORE != statuses) {
        /* fill out completion status and free request if required */
        for( i = 0; i < count; i++, rptr++ ) {
            request  = *rptr;
            /* If the request is OMPI_REQUEST_INACTIVE set the status
             * to ompi_status_empty.
             */
            if( request->req_state == OMPI_REQUEST_INACTIVE ) {
                statuses[i] = ompi_status_empty;
                continue;
            }
            if (OMPI_REQUEST_GEN == request->req_type) {
                ompi_grequest_invoke_query(request, &request->req_status);
            }
            statuses[i] = request->req_status;
            if( request->req_persistent ) {
                request->req_state = OMPI_REQUEST_INACTIVE;
                continue;
            }
            /* MPI-2:4.5.1 says that we can return MPI_ERR_IN_STATUS
               even if MPI_STATUSES_IGNORE was used.  Woot! */
            /* Only free the request if there was no error on it */
            if (MPI_SUCCESS == request->req_status.MPI_ERROR) {
                int tmp = ompi_request_free(rptr);
                if (tmp != OMPI_SUCCESS) {
                    return tmp;
                }
            } else {
                rc = MPI_ERR_IN_STATUS;
            }
        }
    } else {
        /* free request if required */
        for( i = 0; i < count; i++, rptr++ ) {
            request = *rptr;
            if( request->req_state == OMPI_REQUEST_INACTIVE) {
                continue;
            }
            /* See note above: if a generalized request completes, we
               *have* to call the query fn, even if STATUSES_IGNORE
               was supplied */
            if (OMPI_REQUEST_GEN == request->req_type) {
                ompi_grequest_invoke_query(request, &request->req_status);
            }
            if( request->req_persistent ) {
                request->req_state = OMPI_REQUEST_INACTIVE;
                continue;
            }
            /* Only free the request if there was no error */
            if (MPI_SUCCESS == request->req_status.MPI_ERROR) {
                int tmp = ompi_request_free(rptr);
                if (tmp != OMPI_SUCCESS) {
                    return tmp;
                }
            } else {
                rc = MPI_ERR_IN_STATUS;
            }
        }
    }

    return rc;
}
예제 #15
0
int ompi_osc_rdma_attach (struct ompi_win_t *win, void *base, size_t len)
{
    ompi_osc_rdma_module_t *module = GET_MODULE(win);
    const int my_rank = ompi_comm_rank (module->comm);
    ompi_osc_rdma_peer_t *my_peer = ompi_osc_rdma_module_peer (module, my_rank);
    ompi_osc_rdma_region_t *region;
    osc_rdma_counter_t region_count;
    osc_rdma_counter_t region_id;
    void *bound;
    intptr_t page_size = getpagesize ();
    int region_index;
    int ret;

    if (module->flavor != MPI_WIN_FLAVOR_DYNAMIC) {
        return OMPI_ERR_RMA_FLAVOR;
    }

    if (0 == len) {
        /* shot-circuit 0-byte case */
        return OMPI_SUCCESS;
    }

    OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "attach: %s, %p, %lu", win->w_name, base, (unsigned long) len);

    OPAL_THREAD_LOCK(&module->lock);

    region_count = module->state->region_count & 0xffffffffL;
    region_id    = module->state->region_count >> 32;

    if (region_count == mca_osc_rdma_component.max_attach) {
        OPAL_THREAD_UNLOCK(&module->lock);
        return OMPI_ERR_RMA_ATTACH;
    }

    /* it is wasteful to register less than a page. this may allow the remote side to access more
     * memory but the MPI standard covers this with calling the calling behavior erroneous */
    bound = (void *)OPAL_ALIGN((intptr_t) base + len, page_size, intptr_t);
    base = (void *)((intptr_t) base & ~(page_size - 1));
    len = (size_t)((intptr_t) bound - (intptr_t) base);

    /* see if a matching region already exists */
    region = ompi_osc_rdma_find_region_containing ((ompi_osc_rdma_region_t *) module->state->regions, 0, region_count - 1, (intptr_t) base,
                                                   (intptr_t) bound, module->region_size, &region_index);
    if (NULL != region) {
        ++module->dynamic_handles[region_index].refcnt;
        OPAL_THREAD_UNLOCK(&module->lock);
        /* no need to invalidate remote caches */
        return OMPI_SUCCESS;
    }

    /* region is in flux */
    module->state->region_count = -1;
    opal_atomic_wmb ();

    ompi_osc_rdma_lock_acquire_exclusive (module, my_peer, offsetof (ompi_osc_rdma_state_t, regions_lock));

    /* do a binary seach for where the region should be inserted */
    if (region_count) {
        region = find_insertion_point ((ompi_osc_rdma_region_t *) module->state->regions, 0, region_count - 1, (intptr_t) base,
                                       module->region_size, &region_index);

        if (region_index < region_count) {
            memmove ((void *) ((intptr_t) region + module->region_size), region, (region_count - region_index) * module->region_size);

            if (module->selected_btl->btl_register_mem) {
                memmove (module->dynamic_handles + region_index + 1, module->dynamic_handles + region_index,
                         (region_count - region_index) * sizeof (module->dynamic_handles[0]));
            }
        }
    } else {
        region_index = 0;
        region = (ompi_osc_rdma_region_t *) module->state->regions;
    }

    region->base = (intptr_t) base;
    region->len  = len;

    OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "attaching dynamic memory region {%p, %p} at index %d",
                     base, (void *)((intptr_t) base + len), region_index);

    if (module->selected_btl->btl_register_mem) {
        mca_btl_base_registration_handle_t *handle;

        ret = ompi_osc_rdma_register (module, MCA_BTL_ENDPOINT_ANY, (void *) region->base, region->len, MCA_BTL_REG_FLAG_ACCESS_ANY,
                                      &handle);
        if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
            OPAL_THREAD_UNLOCK(&module->lock);
            return OMPI_ERR_RMA_ATTACH;
        }

        memcpy (region->btl_handle_data, handle, module->selected_btl->btl_registration_handle_size);
        module->dynamic_handles[region_index].btl_handle = handle;
    } else {
        module->dynamic_handles[region_index].btl_handle = NULL;
    }

    module->dynamic_handles[region_index].refcnt = 1;

#if OPAL_ENABLE_DEBUG
    for (int i = 0 ; i < region_count + 1 ; ++i) {
        region = (ompi_osc_rdma_region_t *) ((intptr_t) module->state->regions + i * module->region_size);

        OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, " dynamic region %d: {%p, %lu}", i,
                         (void *) region->base, (unsigned long) region->len);
    }
#endif

    opal_atomic_mb ();
    /* the region state has changed */
    module->state->region_count = ((region_id + 1) << 32) | (region_count + 1);

    ompi_osc_rdma_lock_release_exclusive (module, my_peer, offsetof (ompi_osc_rdma_state_t, regions_lock));
    OPAL_THREAD_UNLOCK(&module->lock);

    OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "attach complete");

    return OMPI_SUCCESS;
}
int 
ompi_osc_pt2pt_component_select(ompi_win_t *win,
                               ompi_info_t *info,
                               ompi_communicator_t *comm)
{
    ompi_osc_pt2pt_module_t *module = NULL;
    int ret, i;
    ompi_osc_pt2pt_buffer_t *buffer = NULL;
    opal_free_list_item_t *item = NULL;
    char *tmp = NULL;

    /* create module structure */
    module = (ompi_osc_pt2pt_module_t*)
        calloc(1, sizeof(ompi_osc_pt2pt_module_t));
    if (NULL == module) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;

    /* fill in the function pointer part */
    memcpy(module, &ompi_osc_pt2pt_module_template, 
           sizeof(ompi_osc_base_module_t));

    /* initialize the p2p part */
    OBJ_CONSTRUCT(&(module->p2p_lock), opal_mutex_t);
    OBJ_CONSTRUCT(&(module->p2p_cond), opal_condition_t);
    OBJ_CONSTRUCT(&(module->p2p_acc_lock), opal_mutex_t);
    OBJ_CONSTRUCT(&module->p2p_pending_sendreqs, opal_list_t);
    OBJ_CONSTRUCT(&(module->p2p_copy_pending_sendreqs), opal_list_t);
    OBJ_CONSTRUCT(&(module->p2p_locks_pending), opal_list_t);
    OBJ_CONSTRUCT(&(module->p2p_unlocks_pending), opal_list_t);

    module->p2p_win = win;

    ret = ompi_comm_dup(comm, &(module->p2p_comm));
    if (ret != OMPI_SUCCESS) goto cleanup;

    opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                        "pt2pt component creating window with id %d",
                        ompi_comm_get_cid(module->p2p_comm));

    asprintf(&tmp, "%d", ompi_comm_get_cid(module->p2p_comm));
    ompi_win_set_name(win, tmp);
    free(tmp);

    module->p2p_num_pending_sendreqs = (unsigned int*)
        malloc(sizeof(unsigned int) * ompi_comm_size(module->p2p_comm));
    if (NULL == module->p2p_num_pending_sendreqs) {
        ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
        goto cleanup;
    }
    memset(module->p2p_num_pending_sendreqs, 0, 
           sizeof(unsigned int) * ompi_comm_size(module->p2p_comm));

    module->p2p_num_pending_out = 0;
    module->p2p_num_pending_in = 0;
    module->p2p_num_post_msgs = 0;
    module->p2p_num_complete_msgs = 0;
    module->p2p_tag_counter = 0;

    module->p2p_copy_num_pending_sendreqs = (unsigned int*)
        malloc(sizeof(unsigned int) * ompi_comm_size(module->p2p_comm));
    if (NULL == module->p2p_copy_num_pending_sendreqs) {
        ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
        goto cleanup;
    }
    memset(module->p2p_num_pending_sendreqs, 0, 
           sizeof(unsigned int) * ompi_comm_size(module->p2p_comm));

    /* fence data */
    module->p2p_fence_coll_counts = (int*)
        malloc(sizeof(int) * ompi_comm_size(module->p2p_comm));
    if (NULL == module->p2p_fence_coll_counts) {
        ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
        goto cleanup;
    }
    for (i = 0 ; i < ompi_comm_size(module->p2p_comm) ; ++i) {
        module->p2p_fence_coll_counts[i] = 1;
    }

    /* pwsc data */
    module->p2p_pw_group = NULL;
    module->p2p_sc_group = NULL;
    module->p2p_sc_remote_active_ranks = (bool*)
        malloc(sizeof(bool) * ompi_comm_size(module->p2p_comm));
    if (NULL == module->p2p_sc_remote_active_ranks) {
        ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
        goto cleanup;
    }

    module->p2p_sc_remote_ranks = (int*)
        malloc(sizeof(int) * ompi_comm_size(module->p2p_comm));
    if (NULL == module->p2p_sc_remote_ranks) {
        ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
        goto cleanup;
    }

    /* lock data */
    module->p2p_lock_status = 0;
    module->p2p_shared_count = 0;
    module->p2p_lock_received_ack = 0;

    /* fill in window information */
    win->w_osc_module = (ompi_osc_base_module_t*) module;

    /* sync memory - make sure all initialization completed */
    opal_atomic_mb();

    /* start up receive for protocol headers */
    OPAL_FREE_LIST_GET(&mca_osc_pt2pt_component.p2p_c_buffers,
                        item, ret);
    if (OMPI_SUCCESS != ret) goto cleanup;
    buffer = (ompi_osc_pt2pt_buffer_t*) item;
    buffer->data = (void*) module;

    ret = ompi_osc_pt2pt_component_irecv(buffer->payload,
                                         mca_osc_pt2pt_component.p2p_c_eager_size,
                                         MPI_BYTE,
                                         MPI_ANY_SOURCE,
                                         CONTROL_MSG_TAG,
                                         module->p2p_comm,
                                         &(buffer->request),
                                         component_fragment_cb,
                                         buffer);
    if (OMPI_SUCCESS != ret) goto cleanup;

    return OMPI_SUCCESS;

 cleanup:
    OBJ_DESTRUCT(&module->p2p_unlocks_pending);
    OBJ_DESTRUCT(&module->p2p_locks_pending);
    OBJ_DESTRUCT(&module->p2p_copy_pending_sendreqs);
    OBJ_DESTRUCT(&module->p2p_pending_sendreqs);
    OBJ_DESTRUCT(&module->p2p_acc_lock);
    OBJ_DESTRUCT(&module->p2p_cond);
    OBJ_DESTRUCT(&module->p2p_lock);

    if (NULL != buffer) {
        OPAL_FREE_LIST_RETURN(&mca_osc_pt2pt_component.p2p_c_buffers, item);
    }
    if (NULL != module->p2p_sc_remote_ranks) {
        free(module->p2p_sc_remote_ranks);
    }
    if (NULL != module->p2p_sc_remote_active_ranks) {
        free(module->p2p_sc_remote_active_ranks);
    }
    if (NULL != module->p2p_fence_coll_counts) {
        free(module->p2p_fence_coll_counts);
    }
    if (NULL != module->p2p_copy_num_pending_sendreqs) {
        free(module->p2p_copy_num_pending_sendreqs);
    }
    if (NULL != module->p2p_num_pending_sendreqs) {
        free(module->p2p_num_pending_sendreqs);
    }
    if (NULL != module->p2p_comm) ompi_comm_free(&module->p2p_comm);

#if OPAL_ENABLE_DEBUG
    memset(module, 0, sizeof(ompi_osc_base_module_t));
#endif
    if (NULL != module) free(module);

    return ret;
}