Пример #1
0
int OSP_NbGet(int target, void* src, void* dst, int bytes, OSP_handle_t osp_handle)
{
    int status = OSP_SUCCESS;
    int my_rank = OSPD_Process_id(OSP_GROUP_WORLD);

    OSPU_FUNC_ENTER();

#   ifdef HAVE_ERROR_CHECKING
#   endif

#   ifdef OSP_TAU_PROFILING
    {
        TAU_TRACE_SENDMSG (OSP_TAU_TAG_NBGET, target, bytes);
    }
#   endif

    if(target == my_rank && (bytes < ospu_settings.network_bypass_upper_limit_1d) )
    {
       status = OSPU_Get_memcpy(src, dst, bytes);
       OSPU_ERR_POP(status != OSP_SUCCESS, "OSPU_Get_memcpy returned an error\n");
    }
    else
    {
        status = OSPD_NbGet(target, src, dst, bytes, osp_handle);
        OSPU_ERR_POP(status != OSP_SUCCESS, "OSPD_NbGet returned an error\n");
    }

  fn_exit: 
    OSPU_FUNC_EXIT();
    return status;

  fn_fail: 
    goto fn_exit;
}
Пример #2
0
int OSPD_NbAllreduce_group(OSP_group_t* group,
                          int count,
                          OSP_reduce_op_t osp_op,
                          OSP_datatype_t osp_type,
                          void* in,
                          void* out,
                          OSP_handle_t osp_handle)
{
    int status = OSP_SUCCESS;
    OSPD_Handle_t *ospd_handle;

    OSPU_FUNC_ENTER();

    OSPDI_CRITICAL_ENTER();

    if (group == OSP_GROUP_WORLD || group == NULL)
    {
        OSPU_ERR_POP(1,
                    "OSPDI_NbAllreduce has not been implemented \n");
    }
    else
    {
        OSPU_ERR_POP(1,
                    "OSPD_NbAllreduce_group not implemented for non-world groups!");
    }

    fn_exit:
    OSPDI_CRITICAL_EXIT();
    OSPU_FUNC_EXIT();
    return status;

    fn_fail:
    goto fn_exit;

}
Пример #3
0
int OSPDI_GlobalAllreduce_initialize()
{
    int i,status = OSP_SUCCESS;

    OSPU_FUNC_ENTER();

    barrier_conf.protocol = DCMF_GI_BARRIER_PROTOCOL;
    barrier_conf.cb_geometry = getGeometry;
    status = DCMF_Barrier_register(&OSPD_Barrier_protocol,
                                   &barrier_conf);

    barrier_conf.protocol = DCMF_LOCKBOX_BARRIER_PROTOCOL;
    barrier_conf.cb_geometry = getGeometry;
    status = DCMF_Barrier_register(&OSPD_Localbarrier_protocol,
                                   &barrier_conf);

    /*This has to eventually freed, not being done now*/
    status = OSPDI_Malloc((void **) &allreduce_ranklist, OSPD_Process_info.num_ranks * sizeof(unsigned));
    OSPU_ERR_POP(status != 0,
                "OSPDI_Malloc returned with error %d \n", status);

    for(i=0; i<OSPD_Process_info.num_ranks; i++)
        allreduce_ranklist[i] = i;

    barrier_ptr = &OSPD_Barrier_protocol;
    localbarrier_ptr  = &OSPD_Localbarrier_protocol;
    status = DCMF_Geometry_initialize(&geometry,
                                      0,
                                      allreduce_ranklist,
                                      OSPD_Process_info.num_ranks,
                                      &barrier_ptr,
                                      1,
                                      &localbarrier_ptr,
                                      1,
                                      &crequest,
                                      0,
                                      1);

    allreduce_conf.protocol = DCMF_TORUS_BINOMIAL_ALLREDUCE_PROTOCOL;
    allreduce_conf.cb_geometry = getGeometry;
    allreduce_conf.reuse_storage = 1;
    status = DCMF_Allreduce_register(&OSPD_GlobalAllreduce_protocol,
                                     &allreduce_conf);
    OSPU_ERR_POP(status != DCMF_SUCCESS,
                "DCMF_Allreduce_register returned with error %d \n", status);

    fn_exit:
    OSPU_FUNC_EXIT();
    return status;

    fn_fail:
    goto fn_exit;
}
Пример #4
0
int OSPI_Recursive_PutAcc(int target,
                         int stride_level,
                         int *block_sizes,
                         void* source_ptr,
                         int *src_stride_ar,
                         void* target_ptr,
                         int *trg_stride_ar,
                         OSP_datatype_t osp_type,
                         void* scaling,
                         OSP_handle_t osp_handle)
{
    int i, status = OSP_SUCCESS;

    OSPU_FUNC_ENTER();

    if (stride_level > 0)
    {
        for (i = 0; i < block_sizes[stride_level]; i++)
        {
            status = OSPI_Recursive_PutAcc(target,
                                          stride_level - 1,
                                          block_sizes,
                                          (void *) ((size_t) source_ptr + i * src_stride_ar[stride_level - 1]),
                                          src_stride_ar,
                                          (void *) ((size_t) target_ptr + i * trg_stride_ar[stride_level - 1]),
                                          trg_stride_ar,
                                          osp_type,
                                          scaling,
                                          osp_handle);
            OSPU_ERR_POP(status != OSP_SUCCESS,
                        "OSPI_Recursive_PutAcc returned error in OSPI_Recursive_PutAcc.\n");
        }
    }
    else
    {
        status = OSPD_NbPutAcc(target,
                              source_ptr,
                              target_ptr,
                              block_sizes[0],
                              osp_type,
                              scaling,
                              osp_handle);
        OSPU_ERR_POP(status != OSP_SUCCESS, "OSPD_NbPutAcc returned with an error \n");
    }

    fn_exit:
    OSPU_FUNC_EXIT();
    return status;

    fn_fail:
    goto fn_exit;
}
Пример #5
0
int OSPD_Allreduce_group(OSP_group_t* group,
                        int count,
                        OSP_reduce_op_t osp_op,
                        OSP_datatype_t osp_type,
                        void* in,
                        void* out)
{
    int status = OSP_SUCCESS;

    OSPU_FUNC_ENTER();

    OSPDI_CRITICAL_ENTER();

    if (group == OSP_GROUP_WORLD || group == NULL)
    {
        status = OSPDI_GlobalAllreduce(count, osp_op, osp_type, in, out);
        OSPU_ERR_ABORT(status != OSP_SUCCESS,
                      "OSPDI_GlobalAllreduce returned with an error");
        goto fn_exit;
    }
    else
    {
        OSPU_ERR_POP(1,
                    "OSPD_Allreduce_group not implemented for non-world groups!");
        goto fn_fail;
    }

    fn_exit: OSPDI_CRITICAL_EXIT();
    OSPU_FUNC_EXIT();
    return status;

    fn_fail: goto fn_exit;

}
Пример #6
0
int OSPD_Finalize(void)
{
    int status = OSP_SUCCESS;
    int count = 0;

    OSPU_FUNC_ENTER();

    /* TODO: need to unset "OSP is alive" global variable */

    OSPDI_CRITICAL_ENTER();

    /*waiting for everyone*/
    status = OSPDI_GlobalBarrier();
    OSPU_ERR_POP(status != OSP_SUCCESS, 
              "OSPDI_GlobalBarrier returned with an error");

    /* Freeing request pool */
    OSPDI_Request_pool_finalize();

    /* Freeing handle pool */
    OSPDI_Handle_pool_finalize();

    /* Freeing buffer pool */
    OSPDI_Buffer_pool_finalize();

    /* Freeing memory region pointers and local memroy region*/
    OSPDI_Free(OSPD_Membase_global);
    OSPDI_Free(OSPD_Memregion_global);

    /* Freeing conenction active counters */
    OSPDI_Free((void *) OSPD_Connection_send_active);
    OSPDI_Free((void *) OSPD_Connection_put_active);

    /* Freeing put flush local counters and pointers */
    OSPDI_Free(OSPD_Put_Flushcounter_ptr[OSPD_Process_info.my_rank]);
    OSPDI_Free(OSPD_Put_Flushcounter_ptr);

    if (ospd_settings.enable_cht)
    {
        status = pthread_cancel(OSPDI_CHT_pthread);
    }

    OSPDI_CRITICAL_EXIT();

    /* NOTE: exit critical section before finalize since CS may not work after DCMF is terminated */

    count = DCMF_Messager_finalize();
    /* Do not issue this warning if using MPI since in that case we know DCMF
       will be initialized by MPI before OSP (assuming GA->ARMCI->OSP call path). */
    //if(!ospd_settings.mpi_active)
    //{
    //    OSPU_WARNING(count == 0,
    //                "DCMF_Messager_finalize has been called more than once.");
    //}

    fn_exit: OSPU_FUNC_EXIT();
    return status;

    fn_fail: goto fn_exit;
}
Пример #7
0
int OSPDI_NbGlobalBarrier(OSPD_Handle_t *ospd_handle)
{

    int status = OSP_SUCCESS;
    OSPD_Request_t *ospd_request;
    DCMF_Callback_t done_callback;
    volatile int active;

    OSPU_FUNC_ENTER();

    ospd_request = OSPDI_Get_request(1);
    OSPU_ERR_POP(status = (ospd_request == NULL),
                "OSPDI_Get_request returned error \n");
    OSPDI_Set_handle(ospd_request, ospd_handle);

    ospd_handle->active++;

    done_callback.function = OSPDI_Request_done;
    done_callback.clientdata = (void *) ospd_request;

    status = DCMF_GlobalBarrier(&OSPD_GlobalBarrier_protocol,
                                &(ospd_request->request),
                                done_callback);
    OSPU_ERR_ABORT(status != DCMF_SUCCESS,
                  "DCMF_GlobalBarrier returned with an error");

    fn_exit:
    OSPU_FUNC_EXIT();
    return status;

    fn_fail:
    goto fn_exit;

}
Пример #8
0
int OSPD_Bcast_group(OSP_group_t* group,
                    int root,
                    int count,
                    void* buffer)
{
    int status = OSP_SUCCESS;

    OSPU_FUNC_ENTER();

    OSPDI_CRITICAL_ENTER();

    if (group == OSP_GROUP_WORLD || group == NULL)
    {
        status = OSPDI_GlobalBcast(root, count, buffer);
        OSPU_ERR_ABORT(status != OSP_SUCCESS,
                      "OSPDI_GlobalBcast returned with an error");
        goto fn_exit;
    }
    else
    {
        OSPU_ERR_POP(1,
                    "OSPD_Bcast_group not implemented for non-world groups!");
        goto fn_fail;
    }

    fn_exit:
    OSPDI_CRITICAL_EXIT();
    OSPU_FUNC_EXIT();
    return status;

    fn_fail: goto fn_exit;

}
Пример #9
0
int OSPD_Release_segments(OSP_group_t* group, void *ptr)
{
    int status = OSP_SUCCESS;

    OSPU_FUNC_ENTER();

    OSPDI_CRITICAL_ENTER();

    /*This functions does nothing becuase BG does not involve
      any registration. It has to do a barrier syncrhonization
      to ensure everyone is agreeing on the release*/

    if (group == OSP_GROUP_WORLD || group == NULL)
    {
        status = OSPDI_GlobalBarrier();
        OSPU_ERR_ABORT(status != OSP_SUCCESS, "DCMF_GlobalBarrier returned with an error");
        goto fn_exit;
    }
    else
    {
        OSPU_ERR_POP(1, "OSPD_Release_segments not implemented for non-world groups!");
        goto fn_fail;
    }

  fn_exit:
    OSPDI_CRITICAL_EXIT();
    OSPU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #10
0
int OSPD_Barrier_group(OSP_group_t* group)
{
    int status = OSP_SUCCESS;

    OSPU_FUNC_ENTER();

    OSPDI_CRITICAL_ENTER();

    if (group == OSP_GROUP_WORLD || group == NULL)
    {

        status = OSPDI_GlobalBarrier();
        OSPU_ERR_ABORT(status != OSP_SUCCESS,
                      "DCMF_GlobalBarrier returned with an error");
        goto fn_exit;
    }
    else
    {
        OSPU_ERR_POP(1,
                    "OSPD_Barrier_group not implemented for non-world groups!");
        goto fn_fail;
    }

    fn_exit: OSPDI_CRITICAL_EXIT();
    OSPU_FUNC_EXIT();
    return status;

    fn_fail: goto fn_exit;

}
Пример #11
0
int OSPD_NbPutAccV(int target,
                  OSP_iov_t *iov_ar,
                  int ar_len,
                  OSP_datatype_t osp_type,
                  void* scaling,
                  OSP_handle_t osp_handle)
{
    int status = OSP_SUCCESS;
    OSPD_Handle_t *ospd_handle;

    OSPU_FUNC_ENTER();

    OSPDI_CRITICAL_ENTER();

    ospd_handle = (OSPD_Handle_t *) osp_handle;

    status = OSPDI_Direct_putaccv(target,
                                 iov_ar,
                                 ar_len,
                                 osp_type,
                                 scaling,
                                 ospd_handle);
    OSPU_ERR_POP(status, "Direct putaccv function returned with an error \n");

  fn_exit:
    OSPDI_CRITICAL_EXIT();
    OSPU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #12
0
int OSPD_NbGet(int target, void* src, void* dst, int bytes, OSP_handle_t osp_handle)
{
    int status = OSP_SUCCESS;
    OSPD_Handle_t* ospd_handle = NULL;
    OSPD_Request_t* ospd_request = NULL;
    DCMF_Callback_t callback;
    unsigned src_disp, dst_disp;

    OSPU_FUNC_ENTER();

    OSPDI_CRITICAL_ENTER();

    ospd_handle = (OSPD_Handle_t *) osp_handle;

    ospd_handle->active++;

    ospd_request = OSPDI_Get_request(1);
    OSPU_ERR_POP(status = (ospd_request == NULL), "OSPDI_Get_request returned error.");
    OSPDI_Set_handle(ospd_request, ospd_handle); 

    callback.function = OSPDI_Request_done;
    callback.clientdata = (void *) ospd_request;

    src_disp = (size_t) src - (size_t) OSPD_Membase_global[target];
    dst_disp = (size_t) dst - (size_t) OSPD_Membase_global[OSPD_Process_info.my_rank];

    status = DCMF_Get(&OSPD_Generic_get_protocol,
                      &(ospd_request->request),
                      callback,
                      DCMF_RELAXED_CONSISTENCY,
                      target,
                      bytes,
                      &OSPD_Memregion_global[target],
                      &OSPD_Memregion_global[OSPD_Process_info.my_rank],
                      src_disp,
                      dst_disp);
    OSPU_ERR_POP(status, "DCMF_Get returned with an error \n");

  fn_exit: 
    OSPDI_CRITICAL_EXIT();
    OSPU_FUNC_EXIT();
    return status;

  fn_fail: 
    goto fn_exit;
}
Пример #13
0
int OSP_NbPutAccV(int target,
                 OSP_iov_t *iov_ar,
                 int ar_len,
                 OSP_datatype_t osp_type,
                 void* scaling,
                 OSP_handle_t osp_handle)
{
    int status = OSP_SUCCESS;
    int my_rank = OSPD_Process_id(OSP_GROUP_WORLD);

    OSPU_FUNC_ENTER();

#   ifdef HAVE_ERROR_CHECKING
#   endif

#   ifdef OSP_TAU_PROFILING
    {
        int i, total_bytes = 0;
        for (i = 0; i < ar_len; i++)
            total_bytes += iov_ar[i].ptr_array_len * iov_ar[i].bytes;
        TAU_TRACE_SENDMSG (OSP_TAU_TAG_NBPUTACCV, target, total_bytes);
    }
#   endif

    /* Bypass is ALWAYS better for accumulate; we do not test against threshold. */
    if (target == my_rank && ospu_settings.network_bypass)
    {
        status = OSPU_AccV_memcpy(iov_ar, ar_len, osp_type, scaling);
        OSPU_ERR_POP(status != OSP_SUCCESS, "OSPU_AccV_memcpy returned an error\n");
    }
    else
    {
        status = OSPD_NbPutAccV(target,
                               iov_ar,
                               ar_len,
                               osp_type,
                               scaling,
                               osp_handle);
        OSPU_ERR_POP(status, "OSPD_NbPutAccV returned error\n");
    }

    fn_exit: OSPU_FUNC_EXIT();
    return status;

    fn_fail: goto fn_exit;
}
Пример #14
0
int OSP_NbPut(int target, void* src, void* dst, int bytes, OSP_handle_t osp_handle)
{
    int status = OSP_SUCCESS;
    int my_rank = OSPD_Process_id(OSP_GROUP_WORLD);

    OSPU_FUNC_ENTER();

#   ifdef HAVE_ERROR_CHECKING
#   endif

#   ifdef OSP_TAU_PROFILING
    {
        TAU_TRACE_SENDMSG (OSP_TAU_TAG_NBPUT, target, bytes);
    }
#   endif

    /* Not sure if what is the right strategy for bypass.  OSPU_*_memcpy are blocking
     * but the overhead of going into DCMF_Put is likely not worth the savings
     * from said call being non-blocking.  This is especially true under heavy load
     * since we have determined that DMA vs. memcpy turns over when the NIC is getting
     * hammered.
     */
    if(target == my_rank && (bytes < ospu_settings.network_bypass_upper_limit_1d) )
    {
       status = OSPU_Put_memcpy(src, dst, bytes);
       OSPU_ERR_POP(status != OSP_SUCCESS, "OSPU_Put_memcpy returned an error\n");
    }
    else
    {
        status = OSPD_NbPut(target, src, dst, bytes, osp_handle);
        OSPU_ERR_POP(status != OSP_SUCCESS, "OSPD_NbPut returned an error\n");
    }

  fn_exit: 
    OSPU_FUNC_EXIT();
    return status;

  fn_fail: 
    goto fn_exit;
}
Пример #15
0
int OSPDI_NbGlobalBcast(int root,
                       int count,
                       void *buffer,
                       OSPD_Handle_t *ospd_handle)
{
    int status = OSP_SUCCESS;
    OSPD_Request_t *ospd_request;
    DCMF_Callback_t done_callback;

    OSPU_FUNC_ENTER();

    ospd_request = OSPDI_Get_request(1);
    OSPU_ERR_POP(status = (ospd_request == NULL),
                "OSPDI_Get_request returned error \n");
    OSPDI_Set_handle(ospd_request, ospd_handle);

    ospd_handle->active++;

    done_callback.function = OSPDI_Request_done;
    done_callback.clientdata = (void *) ospd_request;

    status = DCMF_GlobalBcast(&OSPD_GlobalBcast_protocol,
                              &(ospd_request->request),
                              done_callback,
                              DCMF_SEQUENTIAL_CONSISTENCY,
                              root,
                              (char *) buffer,
                              count);
    OSPU_ERR_POP(status != DCMF_SUCCESS,
                "DCMF_GlobalBcast returned with error %d \n",
                status);

    fn_exit:
    OSPU_FUNC_EXIT();
    return status;

    fn_fail:
    goto fn_exit;

}
Пример #16
0
int OSPD_PutAccV(int target,
                OSP_iov_t *iov_ar,
                int ar_len,
                OSP_datatype_t osp_type,
                void* scaling)
{
    int status = OSP_SUCCESS;
    OSPD_Handle_t *ospd_handle;

    OSPU_FUNC_ENTER();

    OSPDI_CRITICAL_ENTER();

    ospd_handle = OSPDI_Get_handle();
    OSPU_ERR_POP(status = (ospd_handle == NULL),
                "OSPDI_Get_handle returned NULL in OSPD_PutAccS.\n");

    status = OSPDI_Direct_putaccv(target,
                                 iov_ar,
                                 ar_len,
                                 osp_type,
                                 scaling,
                                 ospd_handle);
    OSPU_ERR_POP(status, "Direct putaccv function returned with an error \n");

    OSPDI_Conditional_advance(ospd_handle->active > 0);

  fn_exit:
    OSPDI_Release_handle(ospd_handle);
    OSPDI_CRITICAL_EXIT();
    OSPU_FUNC_EXIT();
    return status;

  fn_fail: 
    goto fn_exit;
}
Пример #17
0
int OSPDI_Get_initialize()
{
    int status = OSP_SUCCESS;
    DCMF_Get_Configuration_t conf;

    OSPU_FUNC_ENTER();

    conf.protocol = DCMF_DEFAULT_GET_PROTOCOL;
    conf.network = DCMF_TORUS_NETWORK;
    status = DCMF_Get_register(&OSPD_Generic_get_protocol, &conf);
    OSPU_ERR_POP(status != DCMF_SUCCESS, "DCMF_Get_register failed");

  fn_exit:
    OSPU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #18
0
int OSPD_Get(int target, void* src, void* dst, int bytes)
{
    int status = OSP_SUCCESS;
    DCMF_Request_t request;
    DCMF_Callback_t callback;
    volatile int active;
    unsigned src_disp, dst_disp;

    OSPU_FUNC_ENTER();

    OSPDI_CRITICAL_ENTER();

    callback.function = OSPDI_Generic_done;
    callback.clientdata = (void *) &active;

    src_disp = (size_t) src - (size_t) OSPD_Membase_global[target];
    dst_disp = (size_t) dst - (size_t) OSPD_Membase_global[OSPD_Process_info.my_rank];

    active = 1;

    status = DCMF_Get(&OSPD_Generic_get_protocol,
                      &request,
                      callback,
                      DCMF_RELAXED_CONSISTENCY,
                      target,
                      bytes,
                      &OSPD_Memregion_global[target],
                      &OSPD_Memregion_global[OSPD_Process_info.my_rank],
                      src_disp,
                      dst_disp);
    OSPU_ERR_POP(status, "DCMF_Get returned with an error");

    OSPDI_Conditional_advance(active > 0);

  fn_exit: 
    OSPDI_CRITICAL_EXIT();
    OSPU_FUNC_EXIT();
    return status;

  fn_fail: 
    goto fn_exit;
}
Пример #19
0
int OSPDI_GlobalBarrier_initialize()
{
    int status = OSP_SUCCESS;
    DCMF_GlobalBarrier_Configuration_t conf;

    OSPU_FUNC_ENTER();

    conf.protocol = DCMF_DEFAULT_GLOBALBARRIER_PROTOCOL;
    status = DCMF_GlobalBarrier_register(&OSPD_GlobalBarrier_protocol, &conf);
    OSPU_ERR_POP(status != DCMF_SUCCESS,
                "DCMF_GlobalBarrier_register returned with error %d \n",
                status);

    fn_exit:
    OSPU_FUNC_EXIT();
    return status;

    fn_fail:
    goto fn_exit;
}
Пример #20
0
int OSPD_NbSync_group(OSP_group_t* group, OSP_handle_t osp_handle)
{
    int status = OSP_SUCCESS;
    OSPD_Handle_t *ospd_handle;

    OSPU_FUNC_ENTER();

    OSPDI_CRITICAL_ENTER();

    if (group == OSP_GROUP_WORLD || group == NULL)
    {
        ospd_handle = (OSPD_Handle_t *) osp_handle;

        /*This has to be replace with a non-blocking flushall to make it truly non blocking*/
        status = OSPDI_Flush_all();
        OSPU_ERR_ABORT(status != OSP_SUCCESS,
                      "OSPDI_Flush_all returned with an error");

        status = OSPDI_NbGlobalBarrier(ospd_handle);
        OSPU_ERR_ABORT(status != OSP_SUCCESS,
                      "OSPDI_NbGlobalBarrier returned with an error");

        goto fn_exit;
    }
    else
    {
        OSPU_ERR_POP(1, "OSPD_NbSync_group not implemented for non-world groups!");
        goto fn_fail;
    }

    fn_exit:
    OSPDI_CRITICAL_EXIT();
    OSPU_FUNC_EXIT();
    return status;

    fn_fail:
    goto fn_exit;

}
Пример #21
0
int OSP_Rmw(int target,
           void* source_ptr_in,
           void* source_ptr_out,
           void* target_ptr,
           int bytes,
           OSP_atomic_op_t op,
           OSP_datatype_t osp_type)
{
    int status = OSP_SUCCESS;

    OSPU_FUNC_ENTER();

#   ifdef HAVE_ERROR_CHECKING
#   endif

#   ifdef OSP_TAU_PROFILING
    {
      TAU_TRACE_SENDMSG (OSP_TAU_TAG_RMW, target, bytes);
    }
#   endif

    status = OSPD_Rmw(target,
                     source_ptr_in,
                     source_ptr_out,
                     target_ptr,
                     bytes,
                     op,
                     osp_type);
    OSPU_ERR_POP(status!=OSP_SUCCESS, "OSPD_Rmw returned an error\n");

  fn_exit:
    OSPU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #22
0
int OSPDI_GlobalBcast(int root,
                     int count,
                     void *buffer)
{
    int status = OSP_SUCCESS;
    DCMF_Request_t request;
    DCMF_Callback_t done_callback;
    volatile unsigned gb_active = 0;

    OSPU_FUNC_ENTER();

    gb_active += 1;
    done_callback.function = OSPDI_Generic_done;
    done_callback.clientdata = (void *) &gb_active;

    status = DCMF_GlobalBcast(&OSPD_GlobalBcast_protocol,
                              &request,
                              done_callback,
                              DCMF_SEQUENTIAL_CONSISTENCY,
                              root,
                              (char *) buffer,
                              count);
    OSPU_ERR_POP(status != DCMF_SUCCESS,
                "DCMF_GlobalBcast returned with error %d \n",
                status);

    OSPDI_Conditional_advance(gb_active > 0);

    fn_exit:
    OSPU_FUNC_EXIT();
    return status;

    fn_fail:
    goto fn_exit;

}
Пример #23
0
int OSPU_GetS_local(int stride_level,
                    int *block_sizes,
                    void* source_ptr,
                    int *src_stride_ar,
                    void* target_ptr,
                    int *trg_stride_ar)
{
    int status = OSP_SUCCESS;
    int chunk_count = 1;
    int *block_sizes_w;
    int i, y;

    OSPU_FUNC_ENTER();

    block_sizes_w = malloc(sizeof(int) * (stride_level + 1));
    OSPU_ERR_POP((status = (NULL == block_sizes_w)),
                "malloc failed in OSPU_GetS_local");

    memcpy(block_sizes_w, block_sizes, sizeof(int) * (stride_level + 1));

    for (i = 1; i <= stride_level; i++)
        chunk_count = block_sizes[i] * chunk_count;

    for (i = 0; i < chunk_count; i++)
    {
        memcpy(target_ptr, source_ptr, block_sizes[0]);

        block_sizes_w[1]--;
        if (block_sizes_w[1] == 0)
        {
            y = 1;
            while (block_sizes_w[y] == 0)
            {
                if (y == stride_level)
                {
                    OSPU_ASSERT(i == chunk_count - 1, status);
                    return status;
                }
                y++;
            }
            block_sizes_w[y]--;

            /*The strides done on lower dimensions should be subtracted as these are
              included in the stride along the current dimension*/
            source_ptr = (void *) ((size_t) source_ptr 
                    + src_stride_ar[y - 1]
                                    - (block_sizes[y-1] - 1) * src_stride_ar[y-2]);
            target_ptr = (void *) ((size_t) target_ptr 
                    + trg_stride_ar[y - 1]
                                    - (block_sizes[y-1] - 1) * trg_stride_ar[y-2]);

            y--;
            while (y >= 1)
            {
                block_sizes_w[y] = block_sizes[y];
                y--;
            }
        }
        else
        {
            source_ptr = (void *) ((size_t) source_ptr + src_stride_ar[0]);
            target_ptr = (void *) ((size_t) target_ptr + trg_stride_ar[0]);
        }
    }

    fn_exit:
    OSPU_FUNC_EXIT();
    return status;

    fn_fail:
    goto fn_exit;
}
Пример #24
0
int OSP_NbPutAccS(int target,
                 int stride_level,
                 int *block_sizes,
                 void* source_ptr,
                 int *src_stride_ar,
                 void* target_ptr,
                 int *trg_stride_ar,
                 OSP_datatype_t osp_type,
                 void* scaling,
                 OSP_handle_t osp_handle)
{
    int status = OSP_SUCCESS;
    int my_rank = OSPD_Process_id(OSP_GROUP_WORLD);

    OSPU_FUNC_ENTER();

#   ifdef HAVE_ERROR_CHECKING
#   endif

#   ifdef OSP_TAU_PROFILING
    {
      int i, bytes = 1;
      for (i = 0; i <= stride_levels; i++) total_bytes *= count[i];
      TAU_TRACE_SENDMSG (OSP_TAU_TAG_NBPUTACCS, target, total_bytes);
    }
#   endif

    /* Bypass is ALWAYS better for accumulate; we do not test against threshold. */
    if (target == my_rank && ospu_settings.network_bypass)
    {
        status = OSPU_AccS_local(stride_level,
                                 block_sizes,
                                 source_ptr,
                                 src_stride_ar,
                                 target_ptr,
                                 trg_stride_ar,
                                 osp_type,
                                 scaling);
        OSPU_ERR_POP(status != OSP_SUCCESS, "OSPU_AccS_local returned an error\n");
    }
    else
    {
        status = OSPI_Recursive_PutAcc(target,
                                      stride_level,
                                      block_sizes,
                                      source_ptr,
                                      src_stride_ar,
                                      target_ptr,
                                      trg_stride_ar,
                                      osp_type,
                                      scaling,
                                      osp_handle);
        OSPU_ERR_POP(status!=OSP_SUCCESS, "OSPI_Recursive_PutAcc returned error\n");
    }

    fn_exit:
    OSPU_FUNC_EXIT();
    return status;

    fn_fail:
    goto fn_exit;
}
Пример #25
0
int OSPDI_GlobalAllreduce(int count,
                         OSP_reduce_op_t osp_op,
                         OSP_datatype_t osp_type,
                         void *in,
                         void *out)
{
    int status = OSP_SUCCESS;
    DCMF_CollectiveRequest_t ar_crequest;
    DCMF_Callback_t done_callback;
    DCMF_Op reduce_op;
    DCMF_Dt datatype;
    int bytes = 0;
    void *in_abs = NULL;
    volatile unsigned ga_active = 0;

    OSPU_FUNC_ENTER();

    switch (osp_op)
    {
        case OSP_SUM:
            reduce_op = DCMF_SUM;
            break;
        case OSP_PROD:
            reduce_op = DCMF_PROD;
            break;
        case OSP_MAX:
        case OSP_MAXABS:
            reduce_op = DCMF_MAX;
            break;
        case OSP_MIN:
        case OSP_MINABS:
            reduce_op = DCMF_MIN;
            break;
        case OSP_OR:
            reduce_op = DCMF_LOR;
            break;
        default:
            OSPU_ERR_POP(status != DCMF_SUCCESS, "Unsupported OSP_reduce_op \n");
            break;
    }

    if (osp_op == OSP_MAXABS || osp_op == OSP_MINABS)
    {
        switch (osp_type)
        {
        case OSP_DOUBLE:
            datatype = DCMF_DOUBLE;
            bytes = count * sizeof(double);
            status = OSPDI_Malloc(&in_abs, bytes);
            OSPU_ERR_POP(status != OSP_SUCCESS,
                        "OSPDI_Malloc returned error in OSPDI_GlobalAllreduce \n");
            OSPDI_ABS(double, in, in_abs, count);
            in = in_abs;
            break;
        case OSP_INT32:
            datatype = DCMF_SIGNED_INT;
            bytes = count * sizeof(int32_t);
            status = OSPDI_Malloc(&in_abs, bytes);
            OSPU_ERR_POP(status != OSP_SUCCESS,
                        "OSPDI_Malloc returned error in OSPDI_GlobalAllreduce \n");
            OSPDI_ABS(int32_t, in, in_abs, count);
            in = in_abs;
            break;
        case OSP_INT64:
            datatype = DCMF_SIGNED_LONG_LONG;
            bytes = count * sizeof(int64_t);
            status = OSPDI_Malloc(&in_abs, bytes);
            OSPU_ERR_POP(status != OSP_SUCCESS,
                        "OSPDI_Malloc returned error in OSPDI_GlobalAllreduce \n");
            OSPDI_ABS(int64_t, in, in_abs, count);
            in = in_abs;
            break;
        case OSP_UINT32:
            datatype = DCMF_UNSIGNED_INT;
            break;
        case OSP_UINT64:
            datatype = DCMF_UNSIGNED_LONG_LONG;
            break;
        case OSP_FLOAT:
            datatype = DCMF_FLOAT;
            bytes = count * sizeof(float);
            status = OSPDI_Malloc(&in_abs, bytes);
            OSPU_ERR_POP(status != OSP_SUCCESS,
                        "OSPDI_Malloc returned error in OSPDI_GlobalAllreduce \n");
            OSPDI_ABS(float, in, in_abs, count);
            in = in_abs;
            break;
        default:
            OSPU_ERR_POP(status != DCMF_SUCCESS, "Unsupported OSP_datatype \n");
            break;
        }
    }
    else
    {
        switch (osp_type)
        {
        case OSP_DOUBLE:
            datatype = DCMF_DOUBLE;
            break;
        case OSP_INT32:
            datatype = DCMF_SIGNED_INT;
            break;
        case OSP_INT64:
            datatype = DCMF_SIGNED_LONG_LONG;
            break;
        case OSP_UINT32:
            datatype = DCMF_UNSIGNED_INT;
            break;
        case OSP_UINT64:
            datatype = DCMF_UNSIGNED_LONG_LONG;
            break;
        case OSP_FLOAT:
            datatype = DCMF_FLOAT;
            break;
        default:
            OSPU_ERR_ABORT(status != DCMF_SUCCESS, "Unsupported OSP_datatype \n");
            break;
        }
    }

    ga_active += 1;
    done_callback.function = OSPDI_Generic_done;
    done_callback.clientdata = (void *) &ga_active;

    status = DCMF_Allreduce(&OSPD_GlobalAllreduce_protocol,
                            &ar_crequest,
                            done_callback,
                            DCMF_SEQUENTIAL_CONSISTENCY,
                            &geometry,
                            (char *) in,
                            (char *) out,
                            count,
                            datatype,
                            reduce_op);

    OSPDI_Conditional_advance(ga_active > 0);

    fn_exit:
    if (in_abs != NULL) 
        OSPDI_Free(in_abs);
    OSPU_FUNC_EXIT();
    return status;

    fn_fail:
    goto fn_exit;

}
Пример #26
0
int OSPU_ModV_memcpy(OSP_iov_t *iov_ar,
                    int ar_len,
                    OSP_reduce_op_t osp_op,
                    OSP_datatype_t osp_type,
                    void* scaling)
{
    int i, j, status = OSP_SUCCESS;

    OSPU_FUNC_ENTER();

    OSPD_Global_lock_acquire();

    for (i=0; i<ar_len; i++)
    {
        for(j=0; j<iov_ar[i].ptr_ar_len; j++) 
        {
            switch (osp_op)
            {
                case OSP_BXOR:
                    switch (osp_type)
                    {
                        case OSP_INT32:
                            OSPUI_MOD_BXOR(int32_t,
                                     iov_ar[i].source_ptr_ar[j],
                                     iov_ar[i].target_ptr_ar[j],
                                     (iov_ar[i].size)/sizeof(int32_t));
                            break;
                        case OSP_INT64:
                            OSPUI_MOD_BXOR(int64_t,
                                     iov_ar[i].source_ptr_ar[j],
                                     iov_ar[i].target_ptr_ar[j],
                                     (iov_ar[i].size)/sizeof(int64_t));
                            break;
                        case OSP_UINT32:
                            OSPUI_MOD_BXOR(uint32_t,
                                     iov_ar[i].source_ptr_ar[j],
                                     iov_ar[i].target_ptr_ar[j],
                                     (iov_ar[i].size)/sizeof(uint32_t));
                            break;
                        case OSP_UINT64:
                            OSPUI_MOD_BXOR(uint64_t,
                                     iov_ar[i].source_ptr_ar[j],
                                     iov_ar[i].target_ptr_ar[j],
                                     (iov_ar[i].size)/sizeof(uint64_t));
                            break;
                        default:
                            status = OSP_ERROR;
                            OSPU_ERR_POP((status != OSP_SUCCESS), "Invalid data type in OSPU_AccV_memcpy\n");
                            break;
                    }
                    break;
                default:
                    status = OSP_ERROR;
                    OSPU_ERR_POP((status != OSP_SUCCESS), "Invalid op type in OSPU_AccV_memcpy\n");
                    break;
            }

        }
    }

    OSPD_Global_lock_release();

    fn_exit: OSPU_FUNC_EXIT();
    return status;

    fn_fail:
    goto fn_exit;
}
Пример #27
0
int OSPDI_Direct_putaccv(int target,
                        OSP_iov_t *iov_ar,
                        int ar_len,
                        OSP_datatype_t osp_type,
                        void *scaling,
                        OSPD_Handle_t *ospd_handle)
{
    int i, j, status = OSP_SUCCESS;
    OSPD_Putacc_header_t header;
    OSPD_Request_t *ospd_request;
    DCMF_Callback_t done_callback;

    OSPU_FUNC_ENTER();

    header.datatype = osp_type;
    switch (osp_type)
    {
        case OSP_DOUBLE:
            (header.scaling).double_value = *((double *) scaling);
            break;
        case OSP_INT32:
            (header.scaling).int32_value = *((int32_t *) scaling);
            break;
        case OSP_INT64:
            (header.scaling).int64_value = *((int64_t *) scaling);
            break;
        case OSP_UINT32:
            (header.scaling).uint32_value = *((uint32_t *) scaling);
            break;
        case OSP_UINT64:
            (header.scaling).uint64_value = *((uint64_t *) scaling);
            break;
        case OSP_FLOAT:
            (header.scaling).float_value = *((float *) scaling);
            break;
        default:
            status = OSP_ERROR;
            OSPU_ERR_POP((status != OSP_SUCCESS),"Invalid data type in putacc \n");
            break;
    }

    for (i=0; i<ar_len; i++)
    {
        for(j=0; j<iov_ar[i].ptr_ar_len; j++)
        {

           ospd_request = OSPDI_Get_request(1);
           OSPU_ERR_POP(status = (ospd_request == NULL),
                "OSPDI_Get_request returned error.\n");
           OSPDI_Set_handle(ospd_request, ospd_handle);

           done_callback.function = OSPDI_Request_done;
           done_callback.clientdata = (void *) ospd_request;
 
           ospd_handle->active++;

           header.target_ptr = iov_ar[i].target_ptr_ar[j];
 
           status = DCMF_Send(&OSPD_Generic_putacc_protocol,
                              &(ospd_request->request),
                              done_callback,
                              DCMF_SEQUENTIAL_CONSISTENCY,
                              target,
                              iov_ar[i].size,
                              iov_ar[i].source_ptr_ar[j],
                              (DCQuad *) &header,
                              (unsigned) 2);
           OSPU_ERR_POP((status != DCMF_SUCCESS), "Putacc returned with an error \n");
 
           OSPD_Connection_send_active[target]++;
        }
    }

  fn_exit: 
    OSPU_FUNC_EXIT();
    return status;

  fn_fail: 
    goto fn_exit;
}