Exemple #1
0
int
opal_progress_register(opal_progress_callback_t cb)
{
    int ret = OPAL_SUCCESS;
    size_t index;

    opal_atomic_lock(&progress_lock);

    /* see if we need to allocate more space */
    if (callbacks_len + 1 > callbacks_size) {
        opal_progress_callback_t *tmp;
        tmp = (opal_progress_callback_t*)realloc(callbacks, sizeof(opal_progress_callback_t) * (callbacks_size + 4));
        if (tmp == NULL) {
            ret = OPAL_ERR_TEMP_OUT_OF_RESOURCE;
            goto cleanup;
        }
        /* registering fake callbacks to fill callbacks[] */
        for( index = callbacks_len + 1 ;  index < callbacks_size + 4 ; index++) {
            tmp[index] = &fake_cb;
        }

        callbacks = tmp;
        callbacks_size += 4;
    }

    callbacks[callbacks_len++] = cb;

 cleanup:

    opal_atomic_unlock(&progress_lock);

    return ret;
}
Exemple #2
0
int
opal_progress_register(opal_progress_callback_t cb)
{
    int ret = OPAL_SUCCESS;

#if OMPI_HAVE_THREAD_SUPPORT
    opal_atomic_lock(&progress_lock);
#endif

    /* see if we need to allocate more space */
    if (callbacks_len + 1 > callbacks_size) {
        opal_progress_callback_t *tmp;
        tmp = (opal_progress_callback_t*)realloc(callbacks, sizeof(opal_progress_callback_t) * (callbacks_size + 4));
        if (tmp == NULL) {
            ret = OPAL_ERR_TEMP_OUT_OF_RESOURCE;
            goto cleanup;
        }

        callbacks = tmp;
        callbacks_size += 4;
    }

    callbacks[callbacks_len++] = cb;

 cleanup:

#if OMPI_HAVE_THREAD_SUPPORT
    opal_atomic_unlock(&progress_lock);
#endif

    return ret;
}
Exemple #3
0
static int
atomic_spinlock_test(opal_atomic_lock_t *lock, int count, int id)
{
    int i;

    for (i = 0 ; i < count ; ++i) {
        opal_atomic_lock(lock);
        if (atomic_verbose) { printf("id %03d has the lock (lock)\n", id); }
        opal_atomic_unlock(lock);

        while (opal_atomic_trylock(lock)) { ; }
        if (atomic_verbose) { printf("id %03d has the lock (trylock)\n", id); }
        opal_atomic_unlock(lock);
    }

    return 0;
}
int
ompi_osc_sm_rget_accumulate(void *origin_addr, 
                                  int origin_count, 
                                  struct ompi_datatype_t *origin_dt,
                                  void *result_addr, 
                                  int result_count, 
                                  struct ompi_datatype_t *result_dt,
                                  int target, 
                                  MPI_Aint target_disp, 
                                  int target_count,
                                  struct ompi_datatype_t *target_dt, 
                                  struct ompi_op_t *op, 
                                  struct ompi_win_t *win,
                                  struct ompi_request_t **ompi_req)
{
    int ret;
    ompi_osc_sm_request_t *request;
    ompi_osc_sm_module_t *module =
        (ompi_osc_sm_module_t*) win->w_osc_module;
    void *remote_address;

    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
                         "rget_accumulate: 0x%lx, %d, %s, %d, %d, %d, %s, %s, 0x%lx",
                         (unsigned long) origin_addr, origin_count,
                         origin_dt->name, target, (int) target_disp,
                         target_count, target_dt->name,
                         op->o_name,
                         (unsigned long) win));

    OMPI_OSC_SM_REQUEST_ALLOC(win, request);
    if (NULL == request) return OMPI_ERR_OUT_OF_RESOURCE;
    *ompi_req = &request->super;

    remote_address = ((char*) (module->bases[target])) + module->disp_units[target] * target_disp;

    opal_atomic_lock(&module->node_states[target].accumulate_lock);

    ret = ompi_datatype_sndrcv(remote_address, target_count, target_dt,
                               result_addr, result_count, result_dt);
    if (OMPI_SUCCESS != ret || op == &ompi_mpi_op_no_op.op) goto done;

    if (op == &ompi_mpi_op_replace.op) {
        ret = ompi_datatype_sndrcv(origin_addr, origin_count, origin_dt,
                                   remote_address, target_count, target_dt);
    } else {
        ret = ompi_osc_base_sndrcv_op(origin_addr, origin_count, origin_dt,
                                      remote_address, target_count, target_dt,
                                      op);
    }

 done:
    opal_atomic_unlock(&module->node_states[target].accumulate_lock);

    OMPI_OSC_SM_REQUEST_COMPLETE(request);

    return ret;
}
Exemple #5
0
int32_t
opal_atomic_sub_32(volatile int32_t *addr, int delta)
{
    int32_t ret;

    opal_atomic_lock(FIND_LOCK(addr));

    ret = (*addr -= delta);

    opal_atomic_unlock(FIND_LOCK(addr));

    return ret;
}
Exemple #6
0
int opal_progress_register_lp (opal_progress_callback_t cb)
{
    int ret;

    opal_atomic_lock(&progress_lock);

    (void) _opal_progress_unregister (cb, callbacks, &callbacks_len);

    ret = _opal_progress_register (cb, &callbacks_lp, &callbacks_lp_size, &callbacks_lp_len);

    opal_atomic_unlock(&progress_lock);

    return ret;
}
Exemple #7
0
int
opal_progress_finalize(void)
{
    /* free memory associated with the callbacks */
    opal_atomic_lock(&progress_lock);

    callbacks_len = 0;
    callbacks_size = 0;
    if (NULL != callbacks) {
        free(callbacks);
        callbacks = NULL;
    }

    opal_atomic_unlock(&progress_lock);

    return OPAL_SUCCESS;
}
Exemple #8
0
static void opal_progress_finalize (void)
{
    /* free memory associated with the callbacks */
    opal_atomic_lock(&progress_lock);

    callbacks_len = 0;
    callbacks_size = 0;
    free ((void *) callbacks);
    callbacks = NULL;

    callbacks_lp_len = 0;
    callbacks_lp_size = 0;
    free ((void *) callbacks_lp);
    callbacks_lp = NULL;

    opal_atomic_unlock(&progress_lock);
}
Exemple #9
0
int opal_progress_unregister (opal_progress_callback_t cb)
{
    int ret;

    opal_atomic_lock(&progress_lock);

    ret = _opal_progress_unregister (cb, callbacks, &callbacks_len);

    if (OPAL_SUCCESS != ret) {
        /* if not in the high-priority array try to remove from the lp array.
         * a callback will never be in both. */
        ret = _opal_progress_unregister (cb, callbacks_lp, &callbacks_lp_len);
    }

    opal_atomic_unlock(&progress_lock);

    return ret;
}
int
opal_progress_finalize(void)
{
    /* free memory associated with the callbacks */
#if OPAL_HAVE_THREAD_SUPPORT
    opal_atomic_lock(&progress_lock);
#endif

    callbacks_len = 0;
    callbacks_size = 0;
    if (NULL != callbacks) {
        free(callbacks);
        callbacks = NULL;
    }

#if OPAL_HAVE_THREAD_SUPPORT
    opal_atomic_unlock(&progress_lock);
#endif

    return OPAL_SUCCESS;
}
int
opal_progress_unregister(opal_progress_callback_t cb)
{
    size_t i;
    int ret = OPAL_ERR_NOT_FOUND;

#if OPAL_HAVE_THREAD_SUPPORT
    opal_atomic_lock(&progress_lock);
#endif

    for (i = 0 ; i < callbacks_len ; ++i) {
        if (cb == callbacks[i]) {
            callbacks[i] = &fake_cb;
            ret = OPAL_SUCCESS;
            break;
        }
    }
    
    /* If we found the function we're unregistering: If callbacks_len
       is 0, we're not goig to do anything interesting anyway, so
       skip.  If callbacks_len is 1, it will soon be 0, so no need to
       do any repacking.  size_t can be unsigned, so 0 - 1 is bad for
       a loop condition :). */
    if (OPAL_SUCCESS == ret) {
        if (callbacks_len > 1 ) {
            /* now tightly pack the array */
            for ( ; i < callbacks_len - 1 ; ++i) {
                callbacks[i] = callbacks[i + 1];
            }
        }
        callbacks[callbacks_len - 1] = &fake_cb;
        callbacks_len--;
    }

#if OPAL_HAVE_THREAD_SUPPORT
    opal_atomic_unlock(&progress_lock);
#endif

    return ret;
}
int
ompi_osc_sm_fetch_and_op(void *origin_addr,
                         void *result_addr,
                         struct ompi_datatype_t *dt,
                         int target,
                         OPAL_PTRDIFF_TYPE target_disp,
                         struct ompi_op_t *op,
                         struct ompi_win_t *win)
{
    ompi_osc_sm_module_t *module =
        (ompi_osc_sm_module_t*) win->w_osc_module;
    void *remote_address;

    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
                         "fetch_and_op: 0x%lx, %s, %d, %d, %s, 0x%lx",
                         (unsigned long) origin_addr, 
                         dt->name, target, (int) target_disp,
                         op->o_name,
                         (unsigned long) win));

    remote_address = ((char*) (module->bases[target])) + module->disp_units[target] * target_disp;

    opal_atomic_lock(&module->node_states[target].accumulate_lock);

    /* fetch */
    ompi_datatype_copy_content_same_ddt(dt, 1, (char*) result_addr, (char*) remote_address);
    if (op == &ompi_mpi_op_no_op.op) goto done;

    /* op */
    if (op == &ompi_mpi_op_replace.op) {
        ompi_datatype_copy_content_same_ddt(dt, 1, (char*) remote_address, (char*) origin_addr);
    } else {
        ompi_op_reduce(op, origin_addr, remote_address, 1, dt);
    }

 done:
    opal_atomic_unlock(&module->node_states[target].accumulate_lock);

    return OMPI_SUCCESS;;
}
int
ompi_osc_sm_accumulate(void *origin_addr,
                       int origin_count,
                       struct ompi_datatype_t *origin_dt,
                       int target,
                       OPAL_PTRDIFF_TYPE target_disp,
                       int target_count,
                       struct ompi_datatype_t *target_dt,
                       struct ompi_op_t *op,
                       struct ompi_win_t *win)
{
    int ret;
    ompi_osc_sm_module_t *module =
        (ompi_osc_sm_module_t*) win->w_osc_module;
    void *remote_address;

    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
                         "accumulate: 0x%lx, %d, %s, %d, %d, %d, %s, %s, 0x%lx",
                         (unsigned long) origin_addr, origin_count,
                         origin_dt->name, target, (int) target_disp,
                         target_count, target_dt->name,
                         op->o_name,
                         (unsigned long) win));

    remote_address = ((char*) (module->bases[target])) + module->disp_units[target] * target_disp;

    opal_atomic_lock(&module->node_states[target].accumulate_lock);
    if (op == &ompi_mpi_op_replace.op) {
        ret = ompi_datatype_sndrcv(origin_addr, origin_count, origin_dt,
                                    remote_address, target_count, target_dt);
    } else {
        ret = ompi_osc_base_sndrcv_op(origin_addr, origin_count, origin_dt,
                                      remote_address, target_count, target_dt,
                                      op);
    }
    opal_atomic_unlock(&module->node_states[target].accumulate_lock);

    return ret;
}
int
ompi_osc_sm_compare_and_swap(void *origin_addr,
                             void *compare_addr,
                             void *result_addr,
                             struct ompi_datatype_t *dt,
                             int target,
                             OPAL_PTRDIFF_TYPE target_disp,
                             struct ompi_win_t *win)
{
    ompi_osc_sm_module_t *module =
        (ompi_osc_sm_module_t*) win->w_osc_module;
    void *remote_address;
    size_t size;

    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
                         "compare_and_swap: 0x%lx, %s, %d, %d, 0x%lx",
                         (unsigned long) origin_addr, 
                         dt->name, target, (int) target_disp,
                         (unsigned long) win));

    remote_address = ((char*) (module->bases[target])) + module->disp_units[target] * target_disp;

    ompi_datatype_type_size(dt, &size);

    opal_atomic_lock(&module->node_states[target].accumulate_lock);

    /* fetch */
    ompi_datatype_copy_content_same_ddt(dt, 1, (char*) result_addr, (char*) remote_address);
    /* compare */
    if (0 == memcmp(result_addr, compare_addr, size)) {
        /* set */
        ompi_datatype_copy_content_same_ddt(dt, 1, (char*) remote_address, (char*) origin_addr);
    }

    opal_atomic_unlock(&module->node_states[target].accumulate_lock);

    return OMPI_SUCCESS;
}
/*
 * Lazy initialization of class descriptor.
 */
void opal_class_initialize(opal_class_t *cls)
{
    opal_class_t *c;
    opal_construct_t* cls_construct_array;
    opal_destruct_t* cls_destruct_array;
    int cls_construct_array_count;
    int cls_destruct_array_count;
    int i;

    assert(cls);

    /* Check to see if any other thread got in here and initialized
       this class before we got a chance to */

    if (1 == cls->cls_initialized) {
        return;
    }
    opal_atomic_lock(&class_lock);

    /* If another thread initializing this same class came in at
       roughly the same time, it may have gotten the lock and
       initialized.  So check again. */

    if (1 == cls->cls_initialized) {
        opal_atomic_unlock(&class_lock);
        return;
    }

    /*
     * First calculate depth of class hierarchy
     * And the number of constructors and destructors
     */

    cls->cls_depth = 0;
    cls_construct_array_count = 0;
    cls_destruct_array_count  = 0;
    for (c = cls; c; c = c->cls_parent) {
        if( NULL != c->cls_construct ) {
            cls_construct_array_count++;
        }
        if( NULL != c->cls_destruct ) {
            cls_destruct_array_count++;
        }
        cls->cls_depth++;
    }

    /*
     * Allocate arrays for hierarchy of constructors and destructors
     * plus for each a NULL-sentinel
     */

    cls->cls_construct_array = 
        (void (**)(opal_object_t*))malloc((cls_construct_array_count +
                                           cls_destruct_array_count + 2) *
                                          sizeof(opal_construct_t) );
    if (NULL == cls->cls_construct_array) {
        perror("Out of memory");
        exit(-1);
    }
    cls->cls_destruct_array =
        cls->cls_construct_array + cls_construct_array_count + 1;

    /*
     * The constructor array is reversed, so start at the end
     */

    cls_construct_array = cls->cls_construct_array + cls_construct_array_count;
    cls_destruct_array  = cls->cls_destruct_array;

    c = cls;
    *cls_construct_array = NULL;  /* end marker for the constructors */
    for (i = 0; i < cls->cls_depth; i++) {
        if( NULL != c->cls_construct ) {
            --cls_construct_array;
            *cls_construct_array = c->cls_construct;
        }
        if( NULL != c->cls_destruct ) {
            *cls_destruct_array = c->cls_destruct;
            cls_destruct_array++;
        }
        c = c->cls_parent;
    }
    *cls_destruct_array = NULL;  /* end marker for the destructors */

    cls->cls_initialized = 1;
    save_class(cls);

    /* All done */

    opal_atomic_unlock(&class_lock);
}
Exemple #16
0
/*
 * Progress the event library and any functions that have registered to 
 * be called.  We don't propogate errors from the progress functions,
 * so no action is taken if they return failures.  The functions are
 * expected to return the number of events progressed, to determine
 * whether or not we should call sched_yield() during MPI progress.
 * This is only losely tracked, as an error return can cause the number
 * of progressed events to appear lower than it actually is.  We don't
 * care, as the cost of that happening is far outweighed by the cost
 * of the if checks (they were resulting in bad pipe stalling behavior)
 */
void
opal_progress(void)
{
    size_t i;
    int events = 0;

    if( opal_progress_event_flag != 0 ) {
#if (OMPI_ENABLE_PROGRESS_THREADS == 0) && OPAL_HAVE_WORKING_EVENTOPS
#if OPAL_PROGRESS_USE_TIMERS
#if OPAL_TIMER_USEC_NATIVE
    opal_timer_t now = opal_timer_base_get_usec();
#else
    opal_timer_t now = opal_timer_base_get_cycles();
#endif  /* OPAL_TIMER_USEC_NATIVE */
    /* trip the event library if we've reached our tick rate and we are
       enabled */
        if (now - event_progress_last_time > event_progress_delta ) {
#if OMPI_HAVE_THREAD_SUPPORT
            if (opal_atomic_trylock(&progress_lock)) {
#endif  /* OMPI_HAVE_THREAD_SUPPORT */
                event_progress_last_time = (event_num_mpi_users > 0) ? 
                    now - event_progress_delta : now;

                events += opal_event_loop(opal_progress_event_flag);
#if OMPI_HAVE_THREAD_SUPPORT
                opal_atomic_unlock(&progress_lock);
            }
#endif  /* OMPI_HAVE_THREAD_SUPPORT */
        }

#else /* OPAL_PROGRESS_USE_TIMERS */
    /* trip the event library if we've reached our tick rate and we are
       enabled */
        if (OPAL_THREAD_ADD32(&event_progress_counter, -1) <= 0 ) {
#if OMPI_HAVE_THREAD_SUPPORT
            if (opal_atomic_trylock(&progress_lock)) {
#endif  /* OMPI_HAVE_THREAD_SUPPORT */
                event_progress_counter = 
                    (event_num_mpi_users > 0) ? 0 : event_progress_delta;
                events += opal_event_loop(opal_progress_event_flag);
#if OMPI_HAVE_THREAD_SUPPORT
                opal_atomic_unlock(&progress_lock);
            }
#endif  /* OMPI_HAVE_THREAD_SUPPORT */
        }
#endif /* OPAL_PROGRESS_USE_TIMERS */

#endif /* OMPI_ENABLE_PROGRESS_THREADS == 0 && OPAL_HAVE_WORKING_EVENTOPS */
    }

    /* progress all registered callbacks */
    for (i = 0 ; i < callbacks_len ; ++i) {
        events += (callbacks[i])();
    }

#if defined(__WINDOWS__) || defined(HAVE_SCHED_YIELD)
    if (call_yield && events <= 0) {
        /* If there is nothing to do - yield the processor - otherwise
         * we could consume the processor for the entire time slice. If
         * the processor is oversubscribed - this will result in a best-case
         * latency equivalent to the time-slice.
         */
#if defined(__WINDOWS__)
        SwitchToThread();
#else
        sched_yield();
#endif  /* defined(__WINDOWS__) */
    }
#endif  /* defined(__WINDOWS__) || defined(HAVE_SCHED_YIELD) */
}