int OSPD_Node_total(OSP_group_t* group) { int total; OSPU_FUNC_ENTER(); if (group == OSP_GROUP_WORLD || group == NULL) { total = OSPD_Process_info.num_nodes; goto fn_exit; } else { total = -1; OSPU_ERR_ABORT(1, "OSPD_Node_total not implement for non-world groups!"); goto fn_fail; } fn_exit: OSPU_FUNC_EXIT(); return total; fn_fail: goto fn_exit; }
int OSPD_Finalize(void) { int status = OSP_SUCCESS; int count = 0; OSPU_FUNC_ENTER(); /* TODO: need to unset "OSP is alive" global variable */ OSPDI_CRITICAL_ENTER(); /*waiting for everyone*/ status = OSPDI_GlobalBarrier(); OSPU_ERR_POP(status != OSP_SUCCESS, "OSPDI_GlobalBarrier returned with an error"); /* Freeing request pool */ OSPDI_Request_pool_finalize(); /* Freeing handle pool */ OSPDI_Handle_pool_finalize(); /* Freeing buffer pool */ OSPDI_Buffer_pool_finalize(); /* Freeing memory region pointers and local memroy region*/ OSPDI_Free(OSPD_Membase_global); OSPDI_Free(OSPD_Memregion_global); /* Freeing conenction active counters */ OSPDI_Free((void *) OSPD_Connection_send_active); OSPDI_Free((void *) OSPD_Connection_put_active); /* Freeing put flush local counters and pointers */ OSPDI_Free(OSPD_Put_Flushcounter_ptr[OSPD_Process_info.my_rank]); OSPDI_Free(OSPD_Put_Flushcounter_ptr); if (ospd_settings.enable_cht) { status = pthread_cancel(OSPDI_CHT_pthread); } OSPDI_CRITICAL_EXIT(); /* NOTE: exit critical section before finalize since CS may not work after DCMF is terminated */ count = DCMF_Messager_finalize(); /* Do not issue this warning if using MPI since in that case we know DCMF will be initialized by MPI before OSP (assuming GA->ARMCI->OSP call path). */ //if(!ospd_settings.mpi_active) //{ // OSPU_WARNING(count == 0, // "DCMF_Messager_finalize has been called more than once."); //} fn_exit: OSPU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
int OSPD_Release_segments(OSP_group_t* group, void *ptr) { int status = OSP_SUCCESS; OSPU_FUNC_ENTER(); OSPDI_CRITICAL_ENTER(); /*This functions does nothing becuase BG does not involve any registration. It has to do a barrier syncrhonization to ensure everyone is agreeing on the release*/ if (group == OSP_GROUP_WORLD || group == NULL) { status = OSPDI_GlobalBarrier(); OSPU_ERR_ABORT(status != OSP_SUCCESS, "DCMF_GlobalBarrier returned with an error"); goto fn_exit; } else { OSPU_ERR_POP(1, "OSPD_Release_segments not implemented for non-world groups!"); goto fn_fail; } fn_exit: OSPDI_CRITICAL_EXIT(); OSPU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
int OSPD_Barrier_group(OSP_group_t* group) { int status = OSP_SUCCESS; OSPU_FUNC_ENTER(); OSPDI_CRITICAL_ENTER(); if (group == OSP_GROUP_WORLD || group == NULL) { status = OSPDI_GlobalBarrier(); OSPU_ERR_ABORT(status != OSP_SUCCESS, "DCMF_GlobalBarrier returned with an error"); goto fn_exit; } else { OSPU_ERR_POP(1, "OSPD_Barrier_group not implemented for non-world groups!"); goto fn_fail; } fn_exit: OSPDI_CRITICAL_EXIT(); OSPU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
int OSPD_NbPutAccV(int target, OSP_iov_t *iov_ar, int ar_len, OSP_datatype_t osp_type, void* scaling, OSP_handle_t osp_handle) { int status = OSP_SUCCESS; OSPD_Handle_t *ospd_handle; OSPU_FUNC_ENTER(); OSPDI_CRITICAL_ENTER(); ospd_handle = (OSPD_Handle_t *) osp_handle; status = OSPDI_Direct_putaccv(target, iov_ar, ar_len, osp_type, scaling, ospd_handle); OSPU_ERR_POP(status, "Direct putaccv function returned with an error \n"); fn_exit: OSPDI_CRITICAL_EXIT(); OSPU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
int OSPD_Bcast_group(OSP_group_t* group, int root, int count, void* buffer) { int status = OSP_SUCCESS; OSPU_FUNC_ENTER(); OSPDI_CRITICAL_ENTER(); if (group == OSP_GROUP_WORLD || group == NULL) { status = OSPDI_GlobalBcast(root, count, buffer); OSPU_ERR_ABORT(status != OSP_SUCCESS, "OSPDI_GlobalBcast returned with an error"); goto fn_exit; } else { OSPU_ERR_POP(1, "OSPD_Bcast_group not implemented for non-world groups!"); goto fn_fail; } fn_exit: OSPDI_CRITICAL_EXIT(); OSPU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
int OSP_NbGet(int target, void* src, void* dst, int bytes, OSP_handle_t osp_handle) { int status = OSP_SUCCESS; int my_rank = OSPD_Process_id(OSP_GROUP_WORLD); OSPU_FUNC_ENTER(); # ifdef HAVE_ERROR_CHECKING # endif # ifdef OSP_TAU_PROFILING { TAU_TRACE_SENDMSG (OSP_TAU_TAG_NBGET, target, bytes); } # endif if(target == my_rank && (bytes < ospu_settings.network_bypass_upper_limit_1d) ) { status = OSPU_Get_memcpy(src, dst, bytes); OSPU_ERR_POP(status != OSP_SUCCESS, "OSPU_Get_memcpy returned an error\n"); } else { status = OSPD_NbGet(target, src, dst, bytes, osp_handle); OSPU_ERR_POP(status != OSP_SUCCESS, "OSPD_NbGet returned an error\n"); } fn_exit: OSPU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
int OSPD_NbAllreduce_group(OSP_group_t* group, int count, OSP_reduce_op_t osp_op, OSP_datatype_t osp_type, void* in, void* out, OSP_handle_t osp_handle) { int status = OSP_SUCCESS; OSPD_Handle_t *ospd_handle; OSPU_FUNC_ENTER(); OSPDI_CRITICAL_ENTER(); if (group == OSP_GROUP_WORLD || group == NULL) { OSPU_ERR_POP(1, "OSPDI_NbAllreduce has not been implemented \n"); } else { OSPU_ERR_POP(1, "OSPD_NbAllreduce_group not implemented for non-world groups!"); } fn_exit: OSPDI_CRITICAL_EXIT(); OSPU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
int OSPDI_GlobalBarrier() { int status = OSP_SUCCESS; DCMF_Request_t request; DCMF_Callback_t done_callback; volatile int active; OSPU_FUNC_ENTER(); active = 1; done_callback.function = OSPDI_Generic_done; done_callback.clientdata = (void *) &active; status = DCMF_GlobalBarrier(&OSPD_GlobalBarrier_protocol, &request, done_callback); OSPU_ERR_ABORT(status != DCMF_SUCCESS, "DCMF_GlobalBarrier returned with an error"); OSPDI_Conditional_advance(active > 0); fn_exit: OSPU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
int OSPD_Allreduce_group(OSP_group_t* group, int count, OSP_reduce_op_t osp_op, OSP_datatype_t osp_type, void* in, void* out) { int status = OSP_SUCCESS; OSPU_FUNC_ENTER(); OSPDI_CRITICAL_ENTER(); if (group == OSP_GROUP_WORLD || group == NULL) { status = OSPDI_GlobalAllreduce(count, osp_op, osp_type, in, out); OSPU_ERR_ABORT(status != OSP_SUCCESS, "OSPDI_GlobalAllreduce returned with an error"); goto fn_exit; } else { OSPU_ERR_POP(1, "OSPD_Allreduce_group not implemented for non-world groups!"); goto fn_fail; } fn_exit: OSPDI_CRITICAL_EXIT(); OSPU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
int OSPD_Process_id(OSP_group_t* group) { int id; OSPU_FUNC_ENTER(); if (group == OSP_GROUP_WORLD || group == NULL) { id = OSPD_Process_info.my_rank; goto fn_exit; } else { id = -1; OSPU_ERR_ABORT(1, "OSPD_Process_id not implement for non-world groups!"); goto fn_fail; } fn_exit: OSPU_FUNC_EXIT(); return id; fn_fail: goto fn_exit; }
int OSPDI_NbGlobalBarrier(OSPD_Handle_t *ospd_handle) { int status = OSP_SUCCESS; OSPD_Request_t *ospd_request; DCMF_Callback_t done_callback; volatile int active; OSPU_FUNC_ENTER(); ospd_request = OSPDI_Get_request(1); OSPU_ERR_POP(status = (ospd_request == NULL), "OSPDI_Get_request returned error \n"); OSPDI_Set_handle(ospd_request, ospd_handle); ospd_handle->active++; done_callback.function = OSPDI_Request_done; done_callback.clientdata = (void *) ospd_request; status = DCMF_GlobalBarrier(&OSPD_GlobalBarrier_protocol, &(ospd_request->request), done_callback); OSPU_ERR_ABORT(status != DCMF_SUCCESS, "DCMF_GlobalBarrier returned with an error"); fn_exit: OSPU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
int OSPDI_GlobalAllreduce_initialize() { int i,status = OSP_SUCCESS; OSPU_FUNC_ENTER(); barrier_conf.protocol = DCMF_GI_BARRIER_PROTOCOL; barrier_conf.cb_geometry = getGeometry; status = DCMF_Barrier_register(&OSPD_Barrier_protocol, &barrier_conf); barrier_conf.protocol = DCMF_LOCKBOX_BARRIER_PROTOCOL; barrier_conf.cb_geometry = getGeometry; status = DCMF_Barrier_register(&OSPD_Localbarrier_protocol, &barrier_conf); /*This has to eventually freed, not being done now*/ status = OSPDI_Malloc((void **) &allreduce_ranklist, OSPD_Process_info.num_ranks * sizeof(unsigned)); OSPU_ERR_POP(status != 0, "OSPDI_Malloc returned with error %d \n", status); for(i=0; i<OSPD_Process_info.num_ranks; i++) allreduce_ranklist[i] = i; barrier_ptr = &OSPD_Barrier_protocol; localbarrier_ptr = &OSPD_Localbarrier_protocol; status = DCMF_Geometry_initialize(&geometry, 0, allreduce_ranklist, OSPD_Process_info.num_ranks, &barrier_ptr, 1, &localbarrier_ptr, 1, &crequest, 0, 1); allreduce_conf.protocol = DCMF_TORUS_BINOMIAL_ALLREDUCE_PROTOCOL; allreduce_conf.cb_geometry = getGeometry; allreduce_conf.reuse_storage = 1; status = DCMF_Allreduce_register(&OSPD_GlobalAllreduce_protocol, &allreduce_conf); OSPU_ERR_POP(status != DCMF_SUCCESS, "DCMF_Allreduce_register returned with error %d \n", status); fn_exit: OSPU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
int OSPI_Recursive_PutAcc(int target, int stride_level, int *block_sizes, void* source_ptr, int *src_stride_ar, void* target_ptr, int *trg_stride_ar, OSP_datatype_t osp_type, void* scaling, OSP_handle_t osp_handle) { int i, status = OSP_SUCCESS; OSPU_FUNC_ENTER(); if (stride_level > 0) { for (i = 0; i < block_sizes[stride_level]; i++) { status = OSPI_Recursive_PutAcc(target, stride_level - 1, block_sizes, (void *) ((size_t) source_ptr + i * src_stride_ar[stride_level - 1]), src_stride_ar, (void *) ((size_t) target_ptr + i * trg_stride_ar[stride_level - 1]), trg_stride_ar, osp_type, scaling, osp_handle); OSPU_ERR_POP(status != OSP_SUCCESS, "OSPI_Recursive_PutAcc returned error in OSPI_Recursive_PutAcc.\n"); } } else { status = OSPD_NbPutAcc(target, source_ptr, target_ptr, block_sizes[0], osp_type, scaling, osp_handle); OSPU_ERR_POP(status != OSP_SUCCESS, "OSPD_NbPutAcc returned with an error \n"); } fn_exit: OSPU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
double OSPD_Time_seconds() { OSPU_FUNC_ENTER(); /* TODO: implement this function */ fn_exit: OSPU_FUNC_EXIT(); return 0.0; fn_fail: goto fn_exit; }
unsigned long long OSPD_Time_cycles() { OSPU_FUNC_ENTER(); /* TODO: implement this function */ fn_exit: OSPU_FUNC_EXIT(); return 0; fn_fail: goto fn_exit; }
void OSPD_Abort(int error_code, char error_message[]) { int status = OSP_SUCCESS; OSPU_FUNC_ENTER(); OSPU_ERR_ABORT(status = OSP_ERROR, "User called OSP_ABORT with error code %d, error msg: %s Program terminating abnormally \n", error_code, error_message); fn_exit: OSPU_FUNC_EXIT(); return; fn_fail: goto fn_exit; }
int OSP_NbPutAccV(int target, OSP_iov_t *iov_ar, int ar_len, OSP_datatype_t osp_type, void* scaling, OSP_handle_t osp_handle) { int status = OSP_SUCCESS; int my_rank = OSPD_Process_id(OSP_GROUP_WORLD); OSPU_FUNC_ENTER(); # ifdef HAVE_ERROR_CHECKING # endif # ifdef OSP_TAU_PROFILING { int i, total_bytes = 0; for (i = 0; i < ar_len; i++) total_bytes += iov_ar[i].ptr_array_len * iov_ar[i].bytes; TAU_TRACE_SENDMSG (OSP_TAU_TAG_NBPUTACCV, target, total_bytes); } # endif /* Bypass is ALWAYS better for accumulate; we do not test against threshold. */ if (target == my_rank && ospu_settings.network_bypass) { status = OSPU_AccV_memcpy(iov_ar, ar_len, osp_type, scaling); OSPU_ERR_POP(status != OSP_SUCCESS, "OSPU_AccV_memcpy returned an error\n"); } else { status = OSPD_NbPutAccV(target, iov_ar, ar_len, osp_type, scaling, osp_handle); OSPU_ERR_POP(status, "OSPD_NbPutAccV returned error\n"); } fn_exit: OSPU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
int OSPU_Get_local(void* src, void* dst, unsigned bytes) { int status = OSP_SUCCESS; OSPU_FUNC_ENTER(); memcpy(dst, src, bytes); fn_exit: OSPU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
int OSPD_NbGet(int target, void* src, void* dst, int bytes, OSP_handle_t osp_handle) { int status = OSP_SUCCESS; OSPD_Handle_t* ospd_handle = NULL; OSPD_Request_t* ospd_request = NULL; DCMF_Callback_t callback; unsigned src_disp, dst_disp; OSPU_FUNC_ENTER(); OSPDI_CRITICAL_ENTER(); ospd_handle = (OSPD_Handle_t *) osp_handle; ospd_handle->active++; ospd_request = OSPDI_Get_request(1); OSPU_ERR_POP(status = (ospd_request == NULL), "OSPDI_Get_request returned error."); OSPDI_Set_handle(ospd_request, ospd_handle); callback.function = OSPDI_Request_done; callback.clientdata = (void *) ospd_request; src_disp = (size_t) src - (size_t) OSPD_Membase_global[target]; dst_disp = (size_t) dst - (size_t) OSPD_Membase_global[OSPD_Process_info.my_rank]; status = DCMF_Get(&OSPD_Generic_get_protocol, &(ospd_request->request), callback, DCMF_RELAXED_CONSISTENCY, target, bytes, &OSPD_Memregion_global[target], &OSPD_Memregion_global[OSPD_Process_info.my_rank], src_disp, dst_disp); OSPU_ERR_POP(status, "DCMF_Get returned with an error \n"); fn_exit: OSPDI_CRITICAL_EXIT(); OSPU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
double OSP_Time_seconds(void) { OSPU_FUNC_ENTER(); # ifdef HAVE_ERROR_CHECKING # endif /* FIXME: The profiling interface needs to go here */ /* FIXME: Locking functionality needs to go here */ fn_exit: OSPU_FUNC_EXIT(); return OSPD_Time_seconds(); fn_fail: goto fn_exit; }
int OSP_Node_total(OSP_group_t* group) { OSPU_FUNC_ENTER(); # ifdef HAVE_ERROR_CHECKING # endif /* FIXME: The profiling interface needs to go here */ /* FIXME: Locking functionality needs to go here */ fn_exit: OSPU_FUNC_EXIT(); return OSPD_Node_total(group); fn_fail: goto fn_exit; }
unsigned long long OSP_Time_cycles(void) { OSPU_FUNC_ENTER(); /* FIXME: The profiling interface needs to go here */ /* FIXME: Locking functionality needs to go here */ # ifdef HAVE_ERROR_CHECKING # endif fn_exit: OSPU_FUNC_EXIT(); return OSPD_Time_cycles(); fn_fail: goto fn_exit; }
int OSPD_Free_segment(void *ptr) { int status = OSP_SUCCESS; OSPU_FUNC_ENTER(); OSPDI_CRITICAL_ENTER(); OSPDI_Free(ptr); fn_exit: OSPDI_CRITICAL_EXIT(); OSPU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
int OSPDI_Get_initialize() { int status = OSP_SUCCESS; DCMF_Get_Configuration_t conf; OSPU_FUNC_ENTER(); conf.protocol = DCMF_DEFAULT_GET_PROTOCOL; conf.network = DCMF_TORUS_NETWORK; status = DCMF_Get_register(&OSPD_Generic_get_protocol, &conf); OSPU_ERR_POP(status != DCMF_SUCCESS, "DCMF_Get_register failed"); fn_exit: OSPU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
int OSPD_Get(int target, void* src, void* dst, int bytes) { int status = OSP_SUCCESS; DCMF_Request_t request; DCMF_Callback_t callback; volatile int active; unsigned src_disp, dst_disp; OSPU_FUNC_ENTER(); OSPDI_CRITICAL_ENTER(); callback.function = OSPDI_Generic_done; callback.clientdata = (void *) &active; src_disp = (size_t) src - (size_t) OSPD_Membase_global[target]; dst_disp = (size_t) dst - (size_t) OSPD_Membase_global[OSPD_Process_info.my_rank]; active = 1; status = DCMF_Get(&OSPD_Generic_get_protocol, &request, callback, DCMF_RELAXED_CONSISTENCY, target, bytes, &OSPD_Memregion_global[target], &OSPD_Memregion_global[OSPD_Process_info.my_rank], src_disp, dst_disp); OSPU_ERR_POP(status, "DCMF_Get returned with an error"); OSPDI_Conditional_advance(active > 0); fn_exit: OSPDI_CRITICAL_EXIT(); OSPU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
int OSPDI_GlobalBarrier_initialize() { int status = OSP_SUCCESS; DCMF_GlobalBarrier_Configuration_t conf; OSPU_FUNC_ENTER(); conf.protocol = DCMF_DEFAULT_GLOBALBARRIER_PROTOCOL; status = DCMF_GlobalBarrier_register(&OSPD_GlobalBarrier_protocol, &conf); OSPU_ERR_POP(status != DCMF_SUCCESS, "DCMF_GlobalBarrier_register returned with error %d \n", status); fn_exit: OSPU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
int OSPDI_NbGlobalBcast(int root, int count, void *buffer, OSPD_Handle_t *ospd_handle) { int status = OSP_SUCCESS; OSPD_Request_t *ospd_request; DCMF_Callback_t done_callback; OSPU_FUNC_ENTER(); ospd_request = OSPDI_Get_request(1); OSPU_ERR_POP(status = (ospd_request == NULL), "OSPDI_Get_request returned error \n"); OSPDI_Set_handle(ospd_request, ospd_handle); ospd_handle->active++; done_callback.function = OSPDI_Request_done; done_callback.clientdata = (void *) ospd_request; status = DCMF_GlobalBcast(&OSPD_GlobalBcast_protocol, &(ospd_request->request), done_callback, DCMF_SEQUENTIAL_CONSISTENCY, root, (char *) buffer, count); OSPU_ERR_POP(status != DCMF_SUCCESS, "DCMF_GlobalBcast returned with error %d \n", status); fn_exit: OSPU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
int OSP_NbPut(int target, void* src, void* dst, int bytes, OSP_handle_t osp_handle) { int status = OSP_SUCCESS; int my_rank = OSPD_Process_id(OSP_GROUP_WORLD); OSPU_FUNC_ENTER(); # ifdef HAVE_ERROR_CHECKING # endif # ifdef OSP_TAU_PROFILING { TAU_TRACE_SENDMSG (OSP_TAU_TAG_NBPUT, target, bytes); } # endif /* Not sure if what is the right strategy for bypass. OSPU_*_memcpy are blocking * but the overhead of going into DCMF_Put is likely not worth the savings * from said call being non-blocking. This is especially true under heavy load * since we have determined that DMA vs. memcpy turns over when the NIC is getting * hammered. */ if(target == my_rank && (bytes < ospu_settings.network_bypass_upper_limit_1d) ) { status = OSPU_Put_memcpy(src, dst, bytes); OSPU_ERR_POP(status != OSP_SUCCESS, "OSPU_Put_memcpy returned an error\n"); } else { status = OSPD_NbPut(target, src, dst, bytes, osp_handle); OSPU_ERR_POP(status != OSP_SUCCESS, "OSPD_NbPut returned an error\n"); } fn_exit: OSPU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
int OSPD_NbSync_group(OSP_group_t* group, OSP_handle_t osp_handle) { int status = OSP_SUCCESS; OSPD_Handle_t *ospd_handle; OSPU_FUNC_ENTER(); OSPDI_CRITICAL_ENTER(); if (group == OSP_GROUP_WORLD || group == NULL) { ospd_handle = (OSPD_Handle_t *) osp_handle; /*This has to be replace with a non-blocking flushall to make it truly non blocking*/ status = OSPDI_Flush_all(); OSPU_ERR_ABORT(status != OSP_SUCCESS, "OSPDI_Flush_all returned with an error"); status = OSPDI_NbGlobalBarrier(ospd_handle); OSPU_ERR_ABORT(status != OSP_SUCCESS, "OSPDI_NbGlobalBarrier returned with an error"); goto fn_exit; } else { OSPU_ERR_POP(1, "OSPD_NbSync_group not implemented for non-world groups!"); goto fn_fail; } fn_exit: OSPDI_CRITICAL_EXIT(); OSPU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }