int opal_progress_register(opal_progress_callback_t cb) { int ret = OPAL_SUCCESS; size_t index; opal_atomic_lock(&progress_lock); /* see if we need to allocate more space */ if (callbacks_len + 1 > callbacks_size) { opal_progress_callback_t *tmp; tmp = (opal_progress_callback_t*)realloc(callbacks, sizeof(opal_progress_callback_t) * (callbacks_size + 4)); if (tmp == NULL) { ret = OPAL_ERR_TEMP_OUT_OF_RESOURCE; goto cleanup; } /* registering fake callbacks to fill callbacks[] */ for( index = callbacks_len + 1 ; index < callbacks_size + 4 ; index++) { tmp[index] = &fake_cb; } callbacks = tmp; callbacks_size += 4; } callbacks[callbacks_len++] = cb; cleanup: opal_atomic_unlock(&progress_lock); return ret; }
int opal_progress_register(opal_progress_callback_t cb) { int ret = OPAL_SUCCESS; #if OMPI_HAVE_THREAD_SUPPORT opal_atomic_lock(&progress_lock); #endif /* see if we need to allocate more space */ if (callbacks_len + 1 > callbacks_size) { opal_progress_callback_t *tmp; tmp = (opal_progress_callback_t*)realloc(callbacks, sizeof(opal_progress_callback_t) * (callbacks_size + 4)); if (tmp == NULL) { ret = OPAL_ERR_TEMP_OUT_OF_RESOURCE; goto cleanup; } callbacks = tmp; callbacks_size += 4; } callbacks[callbacks_len++] = cb; cleanup: #if OMPI_HAVE_THREAD_SUPPORT opal_atomic_unlock(&progress_lock); #endif return ret; }
static int atomic_spinlock_test(opal_atomic_lock_t *lock, int count, int id) { int i; for (i = 0 ; i < count ; ++i) { opal_atomic_lock(lock); if (atomic_verbose) { printf("id %03d has the lock (lock)\n", id); } opal_atomic_unlock(lock); while (opal_atomic_trylock(lock)) { ; } if (atomic_verbose) { printf("id %03d has the lock (trylock)\n", id); } opal_atomic_unlock(lock); } return 0; }
int ompi_osc_sm_rget_accumulate(void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, void *result_addr, int result_count, struct ompi_datatype_t *result_dt, int target, MPI_Aint target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_op_t *op, struct ompi_win_t *win, struct ompi_request_t **ompi_req) { int ret; ompi_osc_sm_request_t *request; ompi_osc_sm_module_t *module = (ompi_osc_sm_module_t*) win->w_osc_module; void *remote_address; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "rget_accumulate: 0x%lx, %d, %s, %d, %d, %d, %s, %s, 0x%lx", (unsigned long) origin_addr, origin_count, origin_dt->name, target, (int) target_disp, target_count, target_dt->name, op->o_name, (unsigned long) win)); OMPI_OSC_SM_REQUEST_ALLOC(win, request); if (NULL == request) return OMPI_ERR_OUT_OF_RESOURCE; *ompi_req = &request->super; remote_address = ((char*) (module->bases[target])) + module->disp_units[target] * target_disp; opal_atomic_lock(&module->node_states[target].accumulate_lock); ret = ompi_datatype_sndrcv(remote_address, target_count, target_dt, result_addr, result_count, result_dt); if (OMPI_SUCCESS != ret || op == &ompi_mpi_op_no_op.op) goto done; if (op == &ompi_mpi_op_replace.op) { ret = ompi_datatype_sndrcv(origin_addr, origin_count, origin_dt, remote_address, target_count, target_dt); } else { ret = ompi_osc_base_sndrcv_op(origin_addr, origin_count, origin_dt, remote_address, target_count, target_dt, op); } done: opal_atomic_unlock(&module->node_states[target].accumulate_lock); OMPI_OSC_SM_REQUEST_COMPLETE(request); return ret; }
int32_t opal_atomic_sub_32(volatile int32_t *addr, int delta) { int32_t ret; opal_atomic_lock(FIND_LOCK(addr)); ret = (*addr -= delta); opal_atomic_unlock(FIND_LOCK(addr)); return ret; }
int opal_progress_register_lp (opal_progress_callback_t cb) { int ret; opal_atomic_lock(&progress_lock); (void) _opal_progress_unregister (cb, callbacks, &callbacks_len); ret = _opal_progress_register (cb, &callbacks_lp, &callbacks_lp_size, &callbacks_lp_len); opal_atomic_unlock(&progress_lock); return ret; }
int opal_progress_finalize(void) { /* free memory associated with the callbacks */ opal_atomic_lock(&progress_lock); callbacks_len = 0; callbacks_size = 0; if (NULL != callbacks) { free(callbacks); callbacks = NULL; } opal_atomic_unlock(&progress_lock); return OPAL_SUCCESS; }
static void opal_progress_finalize (void) { /* free memory associated with the callbacks */ opal_atomic_lock(&progress_lock); callbacks_len = 0; callbacks_size = 0; free ((void *) callbacks); callbacks = NULL; callbacks_lp_len = 0; callbacks_lp_size = 0; free ((void *) callbacks_lp); callbacks_lp = NULL; opal_atomic_unlock(&progress_lock); }
int opal_progress_unregister (opal_progress_callback_t cb) { int ret; opal_atomic_lock(&progress_lock); ret = _opal_progress_unregister (cb, callbacks, &callbacks_len); if (OPAL_SUCCESS != ret) { /* if not in the high-priority array try to remove from the lp array. * a callback will never be in both. */ ret = _opal_progress_unregister (cb, callbacks_lp, &callbacks_lp_len); } opal_atomic_unlock(&progress_lock); return ret; }
int opal_progress_finalize(void) { /* free memory associated with the callbacks */ #if OPAL_HAVE_THREAD_SUPPORT opal_atomic_lock(&progress_lock); #endif callbacks_len = 0; callbacks_size = 0; if (NULL != callbacks) { free(callbacks); callbacks = NULL; } #if OPAL_HAVE_THREAD_SUPPORT opal_atomic_unlock(&progress_lock); #endif return OPAL_SUCCESS; }
int opal_progress_unregister(opal_progress_callback_t cb) { size_t i; int ret = OPAL_ERR_NOT_FOUND; #if OPAL_HAVE_THREAD_SUPPORT opal_atomic_lock(&progress_lock); #endif for (i = 0 ; i < callbacks_len ; ++i) { if (cb == callbacks[i]) { callbacks[i] = &fake_cb; ret = OPAL_SUCCESS; break; } } /* If we found the function we're unregistering: If callbacks_len is 0, we're not goig to do anything interesting anyway, so skip. If callbacks_len is 1, it will soon be 0, so no need to do any repacking. size_t can be unsigned, so 0 - 1 is bad for a loop condition :). */ if (OPAL_SUCCESS == ret) { if (callbacks_len > 1 ) { /* now tightly pack the array */ for ( ; i < callbacks_len - 1 ; ++i) { callbacks[i] = callbacks[i + 1]; } } callbacks[callbacks_len - 1] = &fake_cb; callbacks_len--; } #if OPAL_HAVE_THREAD_SUPPORT opal_atomic_unlock(&progress_lock); #endif return ret; }
int ompi_osc_sm_fetch_and_op(void *origin_addr, void *result_addr, struct ompi_datatype_t *dt, int target, OPAL_PTRDIFF_TYPE target_disp, struct ompi_op_t *op, struct ompi_win_t *win) { ompi_osc_sm_module_t *module = (ompi_osc_sm_module_t*) win->w_osc_module; void *remote_address; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "fetch_and_op: 0x%lx, %s, %d, %d, %s, 0x%lx", (unsigned long) origin_addr, dt->name, target, (int) target_disp, op->o_name, (unsigned long) win)); remote_address = ((char*) (module->bases[target])) + module->disp_units[target] * target_disp; opal_atomic_lock(&module->node_states[target].accumulate_lock); /* fetch */ ompi_datatype_copy_content_same_ddt(dt, 1, (char*) result_addr, (char*) remote_address); if (op == &ompi_mpi_op_no_op.op) goto done; /* op */ if (op == &ompi_mpi_op_replace.op) { ompi_datatype_copy_content_same_ddt(dt, 1, (char*) remote_address, (char*) origin_addr); } else { ompi_op_reduce(op, origin_addr, remote_address, 1, dt); } done: opal_atomic_unlock(&module->node_states[target].accumulate_lock); return OMPI_SUCCESS;; }
int ompi_osc_sm_accumulate(void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, OPAL_PTRDIFF_TYPE target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_op_t *op, struct ompi_win_t *win) { int ret; ompi_osc_sm_module_t *module = (ompi_osc_sm_module_t*) win->w_osc_module; void *remote_address; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "accumulate: 0x%lx, %d, %s, %d, %d, %d, %s, %s, 0x%lx", (unsigned long) origin_addr, origin_count, origin_dt->name, target, (int) target_disp, target_count, target_dt->name, op->o_name, (unsigned long) win)); remote_address = ((char*) (module->bases[target])) + module->disp_units[target] * target_disp; opal_atomic_lock(&module->node_states[target].accumulate_lock); if (op == &ompi_mpi_op_replace.op) { ret = ompi_datatype_sndrcv(origin_addr, origin_count, origin_dt, remote_address, target_count, target_dt); } else { ret = ompi_osc_base_sndrcv_op(origin_addr, origin_count, origin_dt, remote_address, target_count, target_dt, op); } opal_atomic_unlock(&module->node_states[target].accumulate_lock); return ret; }
int ompi_osc_sm_compare_and_swap(void *origin_addr, void *compare_addr, void *result_addr, struct ompi_datatype_t *dt, int target, OPAL_PTRDIFF_TYPE target_disp, struct ompi_win_t *win) { ompi_osc_sm_module_t *module = (ompi_osc_sm_module_t*) win->w_osc_module; void *remote_address; size_t size; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "compare_and_swap: 0x%lx, %s, %d, %d, 0x%lx", (unsigned long) origin_addr, dt->name, target, (int) target_disp, (unsigned long) win)); remote_address = ((char*) (module->bases[target])) + module->disp_units[target] * target_disp; ompi_datatype_type_size(dt, &size); opal_atomic_lock(&module->node_states[target].accumulate_lock); /* fetch */ ompi_datatype_copy_content_same_ddt(dt, 1, (char*) result_addr, (char*) remote_address); /* compare */ if (0 == memcmp(result_addr, compare_addr, size)) { /* set */ ompi_datatype_copy_content_same_ddt(dt, 1, (char*) remote_address, (char*) origin_addr); } opal_atomic_unlock(&module->node_states[target].accumulate_lock); return OMPI_SUCCESS; }
/* * Lazy initialization of class descriptor. */ void opal_class_initialize(opal_class_t *cls) { opal_class_t *c; opal_construct_t* cls_construct_array; opal_destruct_t* cls_destruct_array; int cls_construct_array_count; int cls_destruct_array_count; int i; assert(cls); /* Check to see if any other thread got in here and initialized this class before we got a chance to */ if (1 == cls->cls_initialized) { return; } opal_atomic_lock(&class_lock); /* If another thread initializing this same class came in at roughly the same time, it may have gotten the lock and initialized. So check again. */ if (1 == cls->cls_initialized) { opal_atomic_unlock(&class_lock); return; } /* * First calculate depth of class hierarchy * And the number of constructors and destructors */ cls->cls_depth = 0; cls_construct_array_count = 0; cls_destruct_array_count = 0; for (c = cls; c; c = c->cls_parent) { if( NULL != c->cls_construct ) { cls_construct_array_count++; } if( NULL != c->cls_destruct ) { cls_destruct_array_count++; } cls->cls_depth++; } /* * Allocate arrays for hierarchy of constructors and destructors * plus for each a NULL-sentinel */ cls->cls_construct_array = (void (**)(opal_object_t*))malloc((cls_construct_array_count + cls_destruct_array_count + 2) * sizeof(opal_construct_t) ); if (NULL == cls->cls_construct_array) { perror("Out of memory"); exit(-1); } cls->cls_destruct_array = cls->cls_construct_array + cls_construct_array_count + 1; /* * The constructor array is reversed, so start at the end */ cls_construct_array = cls->cls_construct_array + cls_construct_array_count; cls_destruct_array = cls->cls_destruct_array; c = cls; *cls_construct_array = NULL; /* end marker for the constructors */ for (i = 0; i < cls->cls_depth; i++) { if( NULL != c->cls_construct ) { --cls_construct_array; *cls_construct_array = c->cls_construct; } if( NULL != c->cls_destruct ) { *cls_destruct_array = c->cls_destruct; cls_destruct_array++; } c = c->cls_parent; } *cls_destruct_array = NULL; /* end marker for the destructors */ cls->cls_initialized = 1; save_class(cls); /* All done */ opal_atomic_unlock(&class_lock); }
/* * Progress the event library and any functions that have registered to * be called. We don't propogate errors from the progress functions, * so no action is taken if they return failures. The functions are * expected to return the number of events progressed, to determine * whether or not we should call sched_yield() during MPI progress. * This is only losely tracked, as an error return can cause the number * of progressed events to appear lower than it actually is. We don't * care, as the cost of that happening is far outweighed by the cost * of the if checks (they were resulting in bad pipe stalling behavior) */ void opal_progress(void) { size_t i; int events = 0; if( opal_progress_event_flag != 0 ) { #if (OMPI_ENABLE_PROGRESS_THREADS == 0) && OPAL_HAVE_WORKING_EVENTOPS #if OPAL_PROGRESS_USE_TIMERS #if OPAL_TIMER_USEC_NATIVE opal_timer_t now = opal_timer_base_get_usec(); #else opal_timer_t now = opal_timer_base_get_cycles(); #endif /* OPAL_TIMER_USEC_NATIVE */ /* trip the event library if we've reached our tick rate and we are enabled */ if (now - event_progress_last_time > event_progress_delta ) { #if OMPI_HAVE_THREAD_SUPPORT if (opal_atomic_trylock(&progress_lock)) { #endif /* OMPI_HAVE_THREAD_SUPPORT */ event_progress_last_time = (event_num_mpi_users > 0) ? now - event_progress_delta : now; events += opal_event_loop(opal_progress_event_flag); #if OMPI_HAVE_THREAD_SUPPORT opal_atomic_unlock(&progress_lock); } #endif /* OMPI_HAVE_THREAD_SUPPORT */ } #else /* OPAL_PROGRESS_USE_TIMERS */ /* trip the event library if we've reached our tick rate and we are enabled */ if (OPAL_THREAD_ADD32(&event_progress_counter, -1) <= 0 ) { #if OMPI_HAVE_THREAD_SUPPORT if (opal_atomic_trylock(&progress_lock)) { #endif /* OMPI_HAVE_THREAD_SUPPORT */ event_progress_counter = (event_num_mpi_users > 0) ? 0 : event_progress_delta; events += opal_event_loop(opal_progress_event_flag); #if OMPI_HAVE_THREAD_SUPPORT opal_atomic_unlock(&progress_lock); } #endif /* OMPI_HAVE_THREAD_SUPPORT */ } #endif /* OPAL_PROGRESS_USE_TIMERS */ #endif /* OMPI_ENABLE_PROGRESS_THREADS == 0 && OPAL_HAVE_WORKING_EVENTOPS */ } /* progress all registered callbacks */ for (i = 0 ; i < callbacks_len ; ++i) { events += (callbacks[i])(); } #if defined(__WINDOWS__) || defined(HAVE_SCHED_YIELD) if (call_yield && events <= 0) { /* If there is nothing to do - yield the processor - otherwise * we could consume the processor for the entire time slice. If * the processor is oversubscribed - this will result in a best-case * latency equivalent to the time-slice. */ #if defined(__WINDOWS__) SwitchToThread(); #else sched_yield(); #endif /* defined(__WINDOWS__) */ } #endif /* defined(__WINDOWS__) || defined(HAVE_SCHED_YIELD) */ }