int OSPD_Finalize(void) { int status = OSP_SUCCESS; int count = 0; OSPU_FUNC_ENTER(); /* TODO: need to unset "OSP is alive" global variable */ OSPDI_CRITICAL_ENTER(); /*waiting for everyone*/ status = OSPDI_GlobalBarrier(); OSPU_ERR_POP(status != OSP_SUCCESS, "OSPDI_GlobalBarrier returned with an error"); /* Freeing request pool */ OSPDI_Request_pool_finalize(); /* Freeing handle pool */ OSPDI_Handle_pool_finalize(); /* Freeing buffer pool */ OSPDI_Buffer_pool_finalize(); /* Freeing memory region pointers and local memroy region*/ OSPDI_Free(OSPD_Membase_global); OSPDI_Free(OSPD_Memregion_global); /* Freeing conenction active counters */ OSPDI_Free((void *) OSPD_Connection_send_active); OSPDI_Free((void *) OSPD_Connection_put_active); /* Freeing put flush local counters and pointers */ OSPDI_Free(OSPD_Put_Flushcounter_ptr[OSPD_Process_info.my_rank]); OSPDI_Free(OSPD_Put_Flushcounter_ptr); if (ospd_settings.enable_cht) { status = pthread_cancel(OSPDI_CHT_pthread); } OSPDI_CRITICAL_EXIT(); /* NOTE: exit critical section before finalize since CS may not work after DCMF is terminated */ count = DCMF_Messager_finalize(); /* Do not issue this warning if using MPI since in that case we know DCMF will be initialized by MPI before OSP (assuming GA->ARMCI->OSP call path). */ //if(!ospd_settings.mpi_active) //{ // OSPU_WARNING(count == 0, // "DCMF_Messager_finalize has been called more than once."); //} fn_exit: OSPU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
int OSPD_NbPutAccV(int target, OSP_iov_t *iov_ar, int ar_len, OSP_datatype_t osp_type, void* scaling, OSP_handle_t osp_handle) { int status = OSP_SUCCESS; OSPD_Handle_t *ospd_handle; OSPU_FUNC_ENTER(); OSPDI_CRITICAL_ENTER(); ospd_handle = (OSPD_Handle_t *) osp_handle; status = OSPDI_Direct_putaccv(target, iov_ar, ar_len, osp_type, scaling, ospd_handle); OSPU_ERR_POP(status, "Direct putaccv function returned with an error \n"); fn_exit: OSPDI_CRITICAL_EXIT(); OSPU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
int OSPD_Barrier_group(OSP_group_t* group) { int status = OSP_SUCCESS; OSPU_FUNC_ENTER(); OSPDI_CRITICAL_ENTER(); if (group == OSP_GROUP_WORLD || group == NULL) { status = OSPDI_GlobalBarrier(); OSPU_ERR_ABORT(status != OSP_SUCCESS, "DCMF_GlobalBarrier returned with an error"); goto fn_exit; } else { OSPU_ERR_POP(1, "OSPD_Barrier_group not implemented for non-world groups!"); goto fn_fail; } fn_exit: OSPDI_CRITICAL_EXIT(); OSPU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
int OSPD_Release_segments(OSP_group_t* group, void *ptr) { int status = OSP_SUCCESS; OSPU_FUNC_ENTER(); OSPDI_CRITICAL_ENTER(); /*This functions does nothing becuase BG does not involve any registration. It has to do a barrier syncrhonization to ensure everyone is agreeing on the release*/ if (group == OSP_GROUP_WORLD || group == NULL) { status = OSPDI_GlobalBarrier(); OSPU_ERR_ABORT(status != OSP_SUCCESS, "DCMF_GlobalBarrier returned with an error"); goto fn_exit; } else { OSPU_ERR_POP(1, "OSPD_Release_segments not implemented for non-world groups!"); goto fn_fail; } fn_exit: OSPDI_CRITICAL_EXIT(); OSPU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
int OSPD_Bcast_group(OSP_group_t* group, int root, int count, void* buffer) { int status = OSP_SUCCESS; OSPU_FUNC_ENTER(); OSPDI_CRITICAL_ENTER(); if (group == OSP_GROUP_WORLD || group == NULL) { status = OSPDI_GlobalBcast(root, count, buffer); OSPU_ERR_ABORT(status != OSP_SUCCESS, "OSPDI_GlobalBcast returned with an error"); goto fn_exit; } else { OSPU_ERR_POP(1, "OSPD_Bcast_group not implemented for non-world groups!"); goto fn_fail; } fn_exit: OSPDI_CRITICAL_EXIT(); OSPU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
int OSPD_NbAllreduce_group(OSP_group_t* group, int count, OSP_reduce_op_t osp_op, OSP_datatype_t osp_type, void* in, void* out, OSP_handle_t osp_handle) { int status = OSP_SUCCESS; OSPD_Handle_t *ospd_handle; OSPU_FUNC_ENTER(); OSPDI_CRITICAL_ENTER(); if (group == OSP_GROUP_WORLD || group == NULL) { OSPU_ERR_POP(1, "OSPDI_NbAllreduce has not been implemented \n"); } else { OSPU_ERR_POP(1, "OSPD_NbAllreduce_group not implemented for non-world groups!"); } fn_exit: OSPDI_CRITICAL_EXIT(); OSPU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
int OSPD_Allreduce_group(OSP_group_t* group, int count, OSP_reduce_op_t osp_op, OSP_datatype_t osp_type, void* in, void* out) { int status = OSP_SUCCESS; OSPU_FUNC_ENTER(); OSPDI_CRITICAL_ENTER(); if (group == OSP_GROUP_WORLD || group == NULL) { status = OSPDI_GlobalAllreduce(count, osp_op, osp_type, in, out); OSPU_ERR_ABORT(status != OSP_SUCCESS, "OSPDI_GlobalAllreduce returned with an error"); goto fn_exit; } else { OSPU_ERR_POP(1, "OSPD_Allreduce_group not implemented for non-world groups!"); goto fn_fail; } fn_exit: OSPDI_CRITICAL_EXIT(); OSPU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
int OSPD_NbGet(int target, void* src, void* dst, int bytes, OSP_handle_t osp_handle) { int status = OSP_SUCCESS; OSPD_Handle_t* ospd_handle = NULL; OSPD_Request_t* ospd_request = NULL; DCMF_Callback_t callback; unsigned src_disp, dst_disp; OSPU_FUNC_ENTER(); OSPDI_CRITICAL_ENTER(); ospd_handle = (OSPD_Handle_t *) osp_handle; ospd_handle->active++; ospd_request = OSPDI_Get_request(1); OSPU_ERR_POP(status = (ospd_request == NULL), "OSPDI_Get_request returned error."); OSPDI_Set_handle(ospd_request, ospd_handle); callback.function = OSPDI_Request_done; callback.clientdata = (void *) ospd_request; src_disp = (size_t) src - (size_t) OSPD_Membase_global[target]; dst_disp = (size_t) dst - (size_t) OSPD_Membase_global[OSPD_Process_info.my_rank]; status = DCMF_Get(&OSPD_Generic_get_protocol, &(ospd_request->request), callback, DCMF_RELAXED_CONSISTENCY, target, bytes, &OSPD_Memregion_global[target], &OSPD_Memregion_global[OSPD_Process_info.my_rank], src_disp, dst_disp); OSPU_ERR_POP(status, "DCMF_Get returned with an error \n"); fn_exit: OSPDI_CRITICAL_EXIT(); OSPU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
int OSPD_Free_segment(void *ptr) { int status = OSP_SUCCESS; OSPU_FUNC_ENTER(); OSPDI_CRITICAL_ENTER(); OSPDI_Free(ptr); fn_exit: OSPDI_CRITICAL_EXIT(); OSPU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
int OSPD_Get(int target, void* src, void* dst, int bytes) { int status = OSP_SUCCESS; DCMF_Request_t request; DCMF_Callback_t callback; volatile int active; unsigned src_disp, dst_disp; OSPU_FUNC_ENTER(); OSPDI_CRITICAL_ENTER(); callback.function = OSPDI_Generic_done; callback.clientdata = (void *) &active; src_disp = (size_t) src - (size_t) OSPD_Membase_global[target]; dst_disp = (size_t) dst - (size_t) OSPD_Membase_global[OSPD_Process_info.my_rank]; active = 1; status = DCMF_Get(&OSPD_Generic_get_protocol, &request, callback, DCMF_RELAXED_CONSISTENCY, target, bytes, &OSPD_Memregion_global[target], &OSPD_Memregion_global[OSPD_Process_info.my_rank], src_disp, dst_disp); OSPU_ERR_POP(status, "DCMF_Get returned with an error"); OSPDI_Conditional_advance(active > 0); fn_exit: OSPDI_CRITICAL_EXIT(); OSPU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
int OSPD_NbSync_group(OSP_group_t* group, OSP_handle_t osp_handle) { int status = OSP_SUCCESS; OSPD_Handle_t *ospd_handle; OSPU_FUNC_ENTER(); OSPDI_CRITICAL_ENTER(); if (group == OSP_GROUP_WORLD || group == NULL) { ospd_handle = (OSPD_Handle_t *) osp_handle; /*This has to be replace with a non-blocking flushall to make it truly non blocking*/ status = OSPDI_Flush_all(); OSPU_ERR_ABORT(status != OSP_SUCCESS, "OSPDI_Flush_all returned with an error"); status = OSPDI_NbGlobalBarrier(ospd_handle); OSPU_ERR_ABORT(status != OSP_SUCCESS, "OSPDI_NbGlobalBarrier returned with an error"); goto fn_exit; } else { OSPU_ERR_POP(1, "OSPD_NbSync_group not implemented for non-world groups!"); goto fn_fail; } fn_exit: OSPDI_CRITICAL_EXIT(); OSPU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
int OSPD_PutAccV(int target, OSP_iov_t *iov_ar, int ar_len, OSP_datatype_t osp_type, void* scaling) { int status = OSP_SUCCESS; OSPD_Handle_t *ospd_handle; OSPU_FUNC_ENTER(); OSPDI_CRITICAL_ENTER(); ospd_handle = OSPDI_Get_handle(); OSPU_ERR_POP(status = (ospd_handle == NULL), "OSPDI_Get_handle returned NULL in OSPD_PutAccS.\n"); status = OSPDI_Direct_putaccv(target, iov_ar, ar_len, osp_type, scaling, ospd_handle); OSPU_ERR_POP(status, "Direct putaccv function returned with an error \n"); OSPDI_Conditional_advance(ospd_handle->active > 0); fn_exit: OSPDI_Release_handle(ospd_handle); OSPDI_CRITICAL_EXIT(); OSPU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }