예제 #1
0
int ARMCIX_Finalize ()
{
  DCMF_CriticalSection_enter(0);
  DCMF_Messager_finalize ();
  DCMF_CriticalSection_exit(0);
  return 0;
}
예제 #2
0
/**
 * \brief ARMCI Extension non-blocking put operation.
 *
 * \param[in] src       Source buffer on the local node
 * \param[in] dst       Destination buffer on the remote node
 * \param[in] bytes     Number of bytes to transfer
 * \param[in] proc      Remote node rank
 * \param[in] nb_handle ARMCI non-blocking handle
 *
 * \return ???
 */
int ARMCIX_NbPut (void * src, void * dst, int bytes, int proc, armci_ihdl_t nb_handle)
{
  DCMF_CriticalSection_enter (0);

  armcix_dcmf_opaque_t * dcmf = (armcix_dcmf_opaque_t *) &nb_handle->cmpl_info;
  dcmf->active = 1;
  dcmf->connection = &__connection[proc];

  __connection[proc].active++;
  __global_connection.active++;

  DCMF_Callback_t cb_free = { ARMCIX_DCMF_NbOp_cb_done, nb_handle };
  ARMCIX_DCMF_Request_t * new_request = ARMCIX_DCMF_request_allocate (cb_free);
  DCMF_Callback_t cb_done = { (void(*)(void *)) ARMCIX_DCMF_request_free, new_request };

  DCMF_Memregion_t * src_memregion = &__connection[proc].local_mem_region;
  DCMF_Memregion_t * dst_memregion = &__connection[proc].remote_mem_region;

  DCMF_Result result =
    DCMF_Put (&__put_protocol,
              &(new_request->request),
              cb_done,
              DCMF_SEQUENTIAL_CONSISTENCY,
              proc,
              bytes,
              src_memregion,
              dst_memregion,
              armcix_dcmf_va_to_offset (src_memregion, src),
              armcix_dcmf_va_to_offset (dst_memregion, dst));

  DCMF_CriticalSection_exit  (0);

  return (result != DCMF_SUCCESS);
}
예제 #3
0
/**
 * \brief ARMCI Extension blocking put operation.
 *
 * \param[in] src       Source buffer on the local node
 * \param[in] dst       Destination buffer on the remote node
 * \param[in] bytes     Number of bytes to transfer
 * \param[in] proc      Remote node rank
 *
 * \return ???
 */
int ARMCIX_Put( void * src, void * dst, int bytes, int proc)
{
  DCMF_CriticalSection_enter (0);

  volatile unsigned active = 1;
  DCMF_Callback_t cb_wait = { ARMCIX_DCMF_cb_decrement, (void *)&active };
  DCMF_Request_t request;

  DCMF_Memregion_t * src_memregion = &__connection[proc].local_mem_region;
  DCMF_Memregion_t * dst_memregion = &__connection[proc].remote_mem_region;

  DCMF_Result result =
    DCMF_Put (&__put_protocol,
              &request,
              cb_wait,
              DCMF_SEQUENTIAL_CONSISTENCY,
              proc,
              bytes,
              src_memregion,
              dst_memregion,
              armcix_dcmf_va_to_offset (src_memregion, src),
              armcix_dcmf_va_to_offset (dst_memregion, dst));

#ifdef BLOCKING_OPERATIONS_REQUIRE_FENCE
  ARMCIX_Fence (proc);
#else
  while (active) DCMF_Messager_advance ();
#endif

  DCMF_CriticalSection_exit  (0);

  return (result != DCMF_SUCCESS);
}
예제 #4
0
/**
 * \brief ARMCI Extension non-blocking strided put operation.
 *
 * \param[in] src_ptr        pointer to 1st segment at source
 * \param[in] src_stride_arr array of strides at source
 * \param[in] dst_ptr        pointer to 1st segment at destination
 * \param[in] dst_stride_arr array of strides at destination
 * \param[in] seg_count      number of segments at each stride levels: count[0]=bytes
 * \param[in] stride_levels  number of stride levels
 * \param[in] proc           remote process(or) ID
 * \param[in] nb_handle      ARMCI non-blocking handle
 *
 * \return ???
 */
int ARMCIX_NbPutS (void * src_ptr, int * src_stride_arr, 
                   void * dst_ptr, int * dst_stride_arr, 
                   int * seg_count, int stride_levels, int proc,
                   armci_ihdl_t nb_handle)
{
  DCMF_CriticalSection_enter (0);

  // Calculate the number of requests
  unsigned i;
  unsigned n = 1;
  for (i = 0; i < stride_levels; i++) n = n * seg_count[i+1];

  armcix_dcmf_opaque_t * dcmf = (armcix_dcmf_opaque_t *) &nb_handle->cmpl_info;
  dcmf->connection = &__connection[proc];
  dcmf->active = n;

  __connection[proc].active += n;
  __global_connection.active += n;

  unsigned count;
  count = ARMCIX_DCMF_PutS_recurse (src_ptr, src_stride_arr, 
                                    dst_ptr, dst_stride_arr, 
                                    seg_count, stride_levels, proc,
                                    nb_handle);

  //fprintf (stderr, "ARMCIX_NbPutS() -- n=%d == count=%d\n", n, count);
  assert (n == count);

  DCMF_CriticalSection_exit  (0);

  return 0;
}
예제 #5
0
/**
 * \brief Point-to-point fence operation.
 *
 * Blocks until all active messages between the local node and the remote
 * node have completed and acknowledged by the remote node.
 *
 * \param[in] proc       Rank of the remote node to fence
 *
 * \see ARMCIX_AllFence
 * \see ARMCIX_DCMF_ReceiveFenceRequest
 * \see ARMCIX_DCMF_ReceiveFenceAck
 */
void ARMCIX_Fence (int proc)
{
    DCMF_CriticalSection_enter (0);

    DCMF_Request_t request;
    volatile unsigned active = 1;
    DCQuad quad;
    DCMF_Callback_t * cb = (DCMF_Callback_t *) &quad;
    cb->function   = ARMCIX_DCMF_cb_decrement;
    cb->clientdata = (void *) &active;
    DCMF_Send ( &__fence_rts_protocol,
                &request,
    (DCMF_Callback_t) {
        NULL, NULL
    },
    DCMF_SEQUENTIAL_CONSISTENCY,
    proc,
    0,
    NULL,
    (DCQuad *) &quad,
    1);

    while (active) DCMF_Messager_advance ();

    DCMF_CriticalSection_exit (0);
}
예제 #6
0
/**
 * \brief DCMF ARMCI Extension blocking wait operation for all requests to all processes
 *
 * This function invokes DCMF_Messager_advance() until all operations to all
 * processes complete and the associated callbacks are invoked to
 * decrement the global active count.
 *
 * \todo define return values
 * \return 0
 *
 * \see ARMCIX_DCMF_Connection_t
 * \see __global_connection
 */
int ARMCIX_WaitAll ()
{
    DCMF_CriticalSection_enter (0);
    while (__global_connection.active) DCMF_Messager_advance();
    DCMF_CriticalSection_exit  (0);

    return 0;
}
예제 #7
0
/**
 * \brief DCMF ARMCI Extension blocking wait operation for all requests to a specific process
 *
 * This function invokes DCMF_Messager_advance() until all operations to the
 * specified process complete and the associated callbacks are invoked and
 * decrements the active count.
 *
 * \param[in] proc Remote process rank
 *
 * \todo define return values
 * \return 0
 *
 * \see ARMCIX_DCMF_Connection_t
 * \see __connection
 */
int ARMCIX_WaitProc (int proc)
{
    DCMF_CriticalSection_enter (0);
    while (__connection[proc].active) DCMF_Messager_advance();
    DCMF_CriticalSection_exit  (0);

    return 0;
}
예제 #8
0
/**
 * \brief DCMF ARMCI Extension blocking wait operation for a specifc request
 *
 * The armcix_opaque_t structure is an opaque object contains a
 * armcix_dcmf_opaque_t structure which is used to maintain DCMF
 * ARMCIX state information for an operation in progress.
 *
 * This function invokes DCMF_Messager_advance() until the operation
 * completes and its associated callback is invoked and decrements the
 * active count.
 *
 * \param[in] cmpl_info Pointer to the ARMCIX opaque object
 *
 * \todo define return values
 * \return 0
 *
 * \see armcix_dcmf_opaque_t
 */
int ARMCIX_Wait (armcix_opaque_t * cmpl_info)
{
    DCMF_CriticalSection_enter (0);
    armcix_dcmf_opaque_t * dcmf = (armcix_dcmf_opaque_t *) cmpl_info;
    while (dcmf->active) DCMF_Messager_advance();
    DCMF_CriticalSection_exit  (0);

    return 0;
}
예제 #9
0
/**
 * \brief Register the DCMF ARMCI Extention put operation.
 *
 * \param[in]  connection_array Connection array
 *
 * \see DCMF_Send_register
 */
void ARMCIX_DCMF_Put_register (ARMCIX_DCMF_Connection_t * connection_array)
{
  DCMF_CriticalSection_enter (0);

  DCMF_Put_Configuration_t put_configuration = { DCMF_DEFAULT_PUT_PROTOCOL };
  DCMF_Put_register (&__put_protocol, &put_configuration);

  DCMF_CriticalSection_exit (0);
}
예제 #10
0
/**
 * \brief ARMCI Extension non-blocking vector get operation.
 *
 * \param[in] darr      Descriptor array
 * \param[in] len       Length of descriptor array
 * \param[in] proc      Remote process(or) ID
 * \param[in] nb_handle ARMCI non-blocking handle
 *
 * \return ???
 */
int ARMCIX_NbGetV (armci_giov_t * darr, int len, int proc, armci_ihdl_t nb_handle)
{
  DCMF_Result result = DCMF_ERROR;

  DCMF_CriticalSection_enter (0);

  //fprintf (stderr, "ARMCIX_NbGetV() >> len=%d, proc=%d\n", len, proc);

  // Calculate the number of requests
  unsigned n = 0;
  unsigned i, j;
  for (i = 0; i < len; i++)
    for (j = 0; j < darr[i].ptr_array_len; j++)
      n++;

  armcix_dcmf_opaque_t * dcmf = (armcix_dcmf_opaque_t *) &nb_handle->cmpl_info;
  dcmf->connection = &__connection[proc];
  dcmf->active = n;

  __connection[proc].active += n;
  __global_connection.active += n;

  //fprintf (stderr, "ARMCIX_NbGetV() -- n=%d, dcmf->active=%d, __connection[%d].active=%d, __global_connection.active=%d\n", n, dcmf->active, proc, __connection[proc].active, __global_connection.active);

  DCMF_Memregion_t * src_memregion = &__connection[proc].remote_mem_region;
  DCMF_Memregion_t * dst_memregion = &__connection[proc].local_mem_region;

  DCMF_Callback_t cb_free = { ARMCIX_DCMF_NbOp_cb_done, nb_handle };
  DCMF_Callback_t cb_done = { (void(*)(void *)) ARMCIX_DCMF_request_free, NULL };
  for (i = 0; i < len; i++)
  {
    for (j = 0; j < darr[i].ptr_array_len; j++)
    {
      //fprintf (stderr, "ARMCIX_NbGetV() -- src=%p, dst=%p, bytes=%d\n", darr[i].src_ptr_array[j], darr[i].dst_ptr_array[j], darr[i].bytes);
      ARMCIX_DCMF_Request_t * new_request = ARMCIX_DCMF_request_allocate (cb_free);
      cb_done.clientdata = new_request;

      result =
        DCMF_Get (&__get_protocol,
                  &(new_request->request),
                  cb_done,
                  DCMF_SEQUENTIAL_CONSISTENCY,
                  proc,
                  darr[i].bytes,
                  src_memregion,
                  dst_memregion,
                  armcix_dcmf_va_to_offset (src_memregion, darr[i].src_ptr_array[j]),
                  armcix_dcmf_va_to_offset (dst_memregion, darr[i].dst_ptr_array[j]));
    }
  }

  //fprintf (stderr, "ARMCIX_NbGetV() << result=%d\n", result);
  DCMF_CriticalSection_exit  (0);

  return (result != DCMF_SUCCESS);
}
예제 #11
0
void *armcix_advance(void * dummy)
{
    DCMF_CriticalSection_enter (0);
    //fprintf(stdout,"entered armcix_advance\n");
    while (armcix_advance_active)
    {
        DCMF_Messager_advance (0);
        DCMF_CriticalSection_cycle (0);
    }
    //fprintf(stdout,"exited armcix_advance\n");
    DCMF_CriticalSection_exit(0); 
}
예제 #12
0
int ARMCIX_Finalize ()
{
  DCMF_CriticalSection_enter(0);

  // tell armcix_advance_thread to stop hitting DCMF_Messager_advance()
  armcix_advance_active = 0;

  DCMF_Messager_finalize ();

  DCMF_CriticalSection_exit(0);

  return 0;
}
예제 #13
0
/**
 * \brief ARMCI Extension blocking read-modify-write operation.
 *
 * \param[in] op
 * \param[in] ploc
 * \param[in] prem
 * \param[in] extra
 * \param[in] proc
 *
 * \retval ???
 */
int ARMCIX_Rmw (int op, int * ploc, int * prem, int extra, int proc)
{
  DCMF_CriticalSection_enter (0);

  volatile unsigned active = 1;
  
  //fprintf (stderr, "ARMCIX_Rmw() - op == %d, ploc == %p, prem == %p, extra == %d, proc == %d\n", op, ploc, prem, extra, proc);

  /* Initialize the RMW request data                                        */
  ARMCIX_DCMF_RMWRequest_t info;
  info.op = op;
  info.ploc = ploc;
  info.prem = prem;
  switch (op)
  {
    case ARMCI_FETCH_AND_ADD:
    case ARMCI_FETCH_AND_ADD_LONG:
      info.extra = extra;
      break;
    case ARMCI_SWAP:
    case ARMCI_SWAP_LONG:
      info.extra = *ploc;
      break;
    default: 
      armci_die("rmw: operation not supported",op);
      break;
  }

  info.active = (unsigned *)&active;

  DCMF_Request_t request;
  DCMF_Callback_t cb_wait = { NULL, NULL };

  DCMF_Send ( &__rmw_request_protocol,
              &request,
              cb_wait,
              DCMF_SEQUENTIAL_CONSISTENCY,
              proc,
              0,
              NULL,
              (DCQuad *)&info,
              2);

  //fprintf (stderr, "ARMCIX_Rmw() > active == %d (&active == %p)\n", active, &active);
  while (active) DCMF_Messager_advance ();
  //fprintf (stderr, "ARMCIX_Rmw() < active == %d (&active == %p)\n", active, &active);

  DCMF_CriticalSection_exit  (0);

  return 0;
}
예제 #14
0
/**
 * \brief Initialize the DCMF ARMCI resources
 */
int ARMCIX_Init ()
{
  DCMF_CriticalSection_enter(0);

  DCMF_Messager_initialize ();

  ARMCIX_DCMF_Connection_initialize ();

  /* Determine request pool defaults */
  int ARMCIX_DCMF_REQUESTPOOL_MAX = 1000;
  ENV_Int (getenv ("ARMCIX_DCMF_REQUESTPOOL_MAX"), &ARMCIX_DCMF_REQUESTPOOL_MAX);
  int ARMCIX_DCMF_REQUESTPOOL_INC = 0;
  ENV_Int (getenv ("ARMCIX_DCMF_REQUESTPOOL_INC"), &ARMCIX_DCMF_REQUESTPOOL_INC);
  ARMCIX_DCMF_request_initialize (ARMCIX_DCMF_REQUESTPOOL_MAX, ARMCIX_DCMF_REQUESTPOOL_INC);



  ARMCIX_DCMF_Get_register ();

  ARMCIX_DCMF_Put_register (__connection);

  ARMCIX_DCMF_Acc_register (__connection);

  ARMCIX_DCMF_Fence_register (__connection);

  ARMCIX_DCMF_Rmw_register ();

  /* Determine interrupt mode */
  int interrupts = 1;
  ENV_Bool (getenv ("DCMF_INTERRUPT"),  &interrupts);
  ENV_Bool (getenv ("DCMF_INTERRUPTS"), &interrupts);

  DCMF_Configure_t config;
  memset (&config, 0x00, sizeof(DCMF_Configure_t));
  config.interrupts = (interrupts==0)?DCMF_INTERRUPTS_OFF:DCMF_INTERRUPTS_ON;
  DCMF_Messager_configure (&config, &config);

  DCMF_Messager_configure (NULL, &config);

  //ARMCIX_DCMF_request_print ("after armcix_init");

  DCMF_CriticalSection_exit(0);

  return 0;
}
예제 #15
0
파일: a1d_core.c 프로젝트: jeffhammond/a1
int A1D_Finalize()
{
    int mpi_status;
    int i;
    DCMF_Result dcmf_result;

    A1D_Print_stats();

#ifdef FLUSH_IMPLEMENTED
    /* free Put list */
    free(A1D_Put_flush_list);

  #ifdef ACCUMULATE_IMPLEMENTED
    /* free Acc list */
    free(A1D_Send_flush_list);
  #endif

#endif

    /* barrier so that no one is able to access remote memregions after they are destroyed */
    mpi_status = MPI_Barrier(A1D_COMM_WORLD);
    assert(mpi_status==0);

    /* destroy all memregions - not absolutely unnecessary if memregion creation has no side effects */
    DCMF_CriticalSection_enter(0);
    for (i = 0; i < mpi_size; i++)
    {
        dcmf_result = DCMF_Memregion_destroy(&A1D_Memregion_list[i]);
        assert(dcmf_result==DCMF_SUCCESS);
    }
    DCMF_CriticalSection_exit(0);

    /* free memregion list */
    free(A1D_Memregion_list);

    /* free base pointer list */
    free(A1D_Baseptr_list);

    mpi_status = MPI_Comm_free(&A1D_COMM_WORLD);
    assert(mpi_status==0);

    return(0);
}
예제 #16
0
/**
 * \brief Register the DCMF ARMCI Extention fence operation.
 *
 * \param[in]  connection_array Connection array
 *
 * \see DCMF_Control_register
 */
void ARMCIX_DCMF_Fence_register (ARMCIX_DCMF_Connection_t * connection_array)
{
    DCMF_CriticalSection_enter (0);

    DCMF_Send_Configuration_t send_configuration = {
        DCMF_DEFAULT_SEND_PROTOCOL,
        DCMF_DEFAULT_NETWORK,
        ARMCIX_DCMF_ReceiveFenceRequest,
        connection_array,
        NULL,
        NULL
    };
    DCMF_Send_register (&__fence_rts_protocol, &send_configuration);

    DCMF_Control_Configuration_t configuration = {
        DCMF_DEFAULT_CONTROL_PROTOCOL,
        DCMF_DEFAULT_NETWORK,
        ARMCIX_DCMF_ReceiveFenceAck,
        connection_array
    };
    DCMF_Control_register (&__fence_ack_protocol, &configuration);

    DCMF_CriticalSection_exit (0);
}
예제 #17
0
/**
 * \brief Global fence operation.
 *
 * Blocks until all active messages between the local node and all remote
 * nodes have completed and acknowledged by the remote node.
 *
 * \see ARMCIX_Fence
 * \see ARMCIX_DCMF_ReceiveFenceRequest
 * \see ARMCIX_DCMF_ReceiveFenceAck
 */
void ARMCIX_AllFence ()
{
    DCMF_CriticalSection_enter (0);

    unsigned size = DCMF_Messager_size ();
    unsigned peer;

    volatile unsigned active = 0;
    DCQuad quad;
    DCMF_Callback_t * cb = (DCMF_Callback_t *) &quad;
    cb->function   = ARMCIX_DCMF_cb_decrement;
    cb->clientdata = (void *) &active;

    DCMF_Callback_t cb_null = { NULL, NULL };
    DCMF_Callback_t cb_done = { (void (*)(void *, DCMF_Error_t *))ARMCIX_DCMF_request_free, NULL };
    for (peer = 0; peer < size; peer++)
    {
        ARMCIX_DCMF_Request_t * new_request = ARMCIX_DCMF_request_allocate (cb_null);
        cb_done.clientdata = new_request;

        active++;
        DCMF_Send ( &__fence_rts_protocol,
                    &(new_request->request),
                    cb_done,
                    DCMF_SEQUENTIAL_CONSISTENCY,
                    peer,
                    0,
                    NULL,
                    (DCQuad *) &quad,
                    1);

        while (active) DCMF_Messager_advance ();
    }

    DCMF_CriticalSection_exit (0);
}
예제 #18
0
/**
 * \brief Register the DCMF ARMCI Extention rmw operation.
 *
 * \see DCMF_Control_register
 * \see DCMF_Send_register
 */
void ARMCIX_DCMF_Rmw_register ()
{
  DCMF_CriticalSection_enter (0);

  DCMF_Send_Configuration_t request_configuration = {
    DCMF_DEFAULT_SEND_PROTOCOL,
    DCMF_DEFAULT_NETWORK,
    ARMCIX_DCMF_RecvRMWRequest,
    NULL,
    NULL,
    NULL
  };
  DCMF_Send_register (&__rmw_request_protocol, &request_configuration);

  DCMF_Control_Configuration_t response_configuration = {
    DCMF_DEFAULT_CONTROL_PROTOCOL,
    DCMF_DEFAULT_NETWORK,
    ARMCIX_DCMF_ReceiveRMWResponse,
    NULL
  };
  DCMF_Control_register (&__rmw_response_protocol, &response_configuration);

  DCMF_CriticalSection_exit (0);
}
예제 #19
0
파일: a1d_core.c 프로젝트: jeffhammond/a1
int A1D_Initialize()
{
    int mpi_initialized, mpi_provided;
    int mpi_status;
    int i;
    size_t bytes_in, bytes_out;
    DCMF_Result dcmf_result;
    DCMF_Configure_t dcmf_config;
    DCMF_Memregion_t local_memregion;

    /***************************************************
     *
     * configure MPI
     *
     ***************************************************/

    /* MPI has to be initialized for this implementation to work */
    MPI_Initialized(&mpi_initialized);
    assert(mpi_initialized==1);

    /* MPI has to be thread-safe so that DCMF doesn't explode */
    MPI_Query_thread(&mpi_provided);
    assert(mpi_provided==MPI_THREAD_MULTIPLE);

    /* have to use our own communicator for collectives to be proper */
    mpi_status = MPI_Comm_dup(MPI_COMM_WORLD,&A1D_COMM_WORLD);
    assert(mpi_status==0);

    /* get my MPI rank */
    mpi_status = MPI_Comm_rank(A1D_COMM_WORLD,&myrank);
    assert(mpi_status==0);

    /* get MPI world size */
    mpi_status = MPI_Comm_size(A1D_COMM_WORLD,&mpi_size);
    assert(mpi_status==0);

    /* make sure MPI and DCMF agree */
    assert(myrank==DCMF_Messager_rank());
    assert(mpi_size==DCMF_Messager_size());

    /* barrier before DCMF_Messager_configure to make sure MPI is ready everywhere */
    mpi_status = MPI_Barrier(A1D_COMM_WORLD);
    assert(mpi_status==0);

    /***************************************************
     *
     * configure DCMF
     *
     ***************************************************/

    /* to be safe, but perhaps not necessary */
    dcmf_config.thread_level = DCMF_THREAD_MULTIPLE;
#ifdef ACCUMULATE_IMPLEMENTED
    /* interrupts required for accumulate only, Put/Get use DMA
     * if accumulate not used, MPI will query environment for DCMF_INTERRUPTS */
    dcmf_config.interrupts = DCMF_INTERRUPTS_ON;
#endif

    /* reconfigure DCMF with interrupts on */
    DCMF_CriticalSection_enter(0);
    dcmf_result = DCMF_Messager_configure(&dcmf_config, &dcmf_config);
    assert(dcmf_result==DCMF_SUCCESS);
    DCMF_CriticalSection_exit(0);

    /* barrier after DCMF_Messager_configure to make sure everyone has the new DCMF config */
    mpi_status = MPI_Barrier(A1D_COMM_WORLD);
    assert(mpi_status==0);

    /***************************************************
     *
     * setup DCMF memregions
     *
     ***************************************************/

    /* allocate memregion list */
    A1D_Memregion_list = malloc( mpi_size * sizeof(DCMF_Memregion_t) );
    assert(A1D_Memregion_list != NULL);

    /* allocate base pointer list */
    A1D_Baseptr_list = malloc( mpi_size * sizeof(void*) );
    assert(A1D_Memregion_list != NULL);

    /* create memregions */
    bytes_in = -1;
    DCMF_CriticalSection_enter(0);
    dcmf_result = DCMF_Memregion_create(&local_memregion,&bytes_out,bytes_in,NULL,0);
    assert(dcmf_result==DCMF_SUCCESS);
    DCMF_CriticalSection_exit(0);

    /* exchange memregions because we don't use symmetry heap */
    mpi_status = MPI_Allgather(&local_memregion,sizeof(DCMF_Memregion_t),MPI_BYTE,
                               A1D_Memregion_list,sizeof(DCMF_Memregion_t),MPI_BYTE,
                               A1D_COMM_WORLD);
    assert(mpi_status==0);

    /* destroy temporary local memregion */
    DCMF_CriticalSection_enter(0);
    dcmf_result = DCMF_Memregion_destroy(&local_memregion);
    assert(dcmf_result==DCMF_SUCCESS);
    DCMF_CriticalSection_exit(0);

    /* check for valid memregions */
    DCMF_CriticalSection_enter(0);
    for (i = 0; i < mpi_size; i++)
    {
        dcmf_result = DCMF_Memregion_query(&A1D_Memregion_list[i],
                                           &bytes_out,
                                           &A1D_Baseptr_list[i]);
        assert(dcmf_result==DCMF_SUCCESS);
    }
    DCMF_CriticalSection_exit(0);

#ifdef FLUSH_IMPLEMENTED
    /***************************************************
     *
     * setup flush list(s)
     *
     ***************************************************/

    /* allocate Put list */
    A1D_Put_flush_list = malloc( mpi_size * sizeof(int) );
    assert(A1D_Put_flush_list != NULL);

  #ifdef ACCUMULATE_IMPLEMENTED
    /* allocate Acc list */
    A1D_Send_flush_list = malloc( mpi_size * sizeof(int) );
    assert(A1D_Send_flush_list != NULL);
  #endif

#endif

    /***************************************************
     *
     * define null callback
     *
     ***************************************************/

    A1D_Nocallback.function = NULL;
    A1D_Nocallback.clientdata = NULL;

    return(0);
}
예제 #20
0
/**
 * \brief Initialize the DCMF ARMCI resources
 */
int ARMCIX_Init ()
{
  DCMF_CriticalSection_enter(0);

  DCMF_Messager_initialize ();

  ARMCIX_DCMF_Connection_initialize ();

  /* Determine request pool defaults */
  int ARMCIX_DCMF_REQUESTPOOL_MAX = 1000;
  ENV_Int (getenv ("ARMCIX_DCMF_REQUESTPOOL_MAX"), &ARMCIX_DCMF_REQUESTPOOL_MAX);
  int ARMCIX_DCMF_REQUESTPOOL_INC = 0;
  ENV_Int (getenv ("ARMCIX_DCMF_REQUESTPOOL_INC"), &ARMCIX_DCMF_REQUESTPOOL_INC);
  ARMCIX_DCMF_request_initialize (ARMCIX_DCMF_REQUESTPOOL_MAX, ARMCIX_DCMF_REQUESTPOOL_INC);



  ARMCIX_DCMF_Get_register ();

  ARMCIX_DCMF_Put_register (__connection);

  ARMCIX_DCMF_Acc_register (__connection);

  ARMCIX_DCMF_Fence_register (__connection);

  ARMCIX_DCMF_Rmw_register ();

  /* Initializer helper thread or configure interrupt mode */

  int interrupts = 0;
  ENV_Bool (getenv ("DCMF_INTERRUPT"),  &interrupts);
  ENV_Bool (getenv ("DCMF_INTERRUPTS"), &interrupts);
  //fprintf(stdout,"interrupts = %d\n",interrupts);
  /*if (interrupts==1){
     if( 0==DCMF_Messager_rank() ) fprintf(stdout,"DCMF interrupts ON\n");
  } else {
     if( 0==DCMF_Messager_rank() ) fprintf(stdout,"DCMF interrupts OFF\n");
  }*/

  if (interrupts==0) {
      int ret = pthread_create(&armcix_advance_thread, NULL, armcix_advance, NULL);
      if ( ret != 0 ) {
          if( 0==DCMF_Messager_rank() ) fprintf(stdout,"pthread_create failed\n");
          armcix_advance_active = 0;
      } else {
          if( 0==DCMF_Messager_rank() ) fprintf(stdout,"pthread_create succeeded\n");
          armcix_advance_active = 1;
      }
  }
  
  DCMF_Configure_t config;
  memset (&config, 0x00, sizeof(DCMF_Configure_t));
  config.interrupts = (interrupts==0)?DCMF_INTERRUPTS_OFF:DCMF_INTERRUPTS_ON;
  DCMF_Messager_configure (&config, &config);

  DCMF_Messager_configure (NULL, &config);

  //ARMCIX_DCMF_request_print ("after armcix_init");

  DCMF_CriticalSection_exit(0);

  return 0;
}
예제 #21
0
void ARMCIX_DCMF_Connection_initialize ()
{
  DCMF_CriticalSection_enter(0);

  __global_connection.peer = (unsigned) -1;

  unsigned rank = DCMF_Messager_rank ();
  unsigned size = DCMF_Messager_size ();
  posix_memalign ((void **)&__connection, 16, sizeof(ARMCIX_DCMF_Connection_t) * size);
  bzero ((void *)__connection, sizeof(ARMCIX_DCMF_Connection_t) * size);

  void * base  = NULL;
  size_t bytes = (size_t) -1;

  unsigned i;
  for (i = 0; i < size; i++)
  {
    __connection[i].peer = i;
#warning fix memregion setup to handle non-global address space pinning.
    //DCMF_Result result =
      DCMF_Memregion_create (&__connection[i].local_mem_region,
                             &bytes, (size_t) -1, NULL, 0);
  }

  // Register a send protocol to exchange memory regions
  DCMF_Protocol_t send_protocol;
  DCMF_Send_Configuration_t send_configuration = {
    DCMF_DEFAULT_SEND_PROTOCOL,
    DCMF_DEFAULT_NETWORK,
    ARMCIX_DCMF_RecvMemregion1,
    __connection,
    ARMCIX_DCMF_RecvMemregion2,
    __connection
  };
  DCMF_Send_register (&send_protocol, &send_configuration);

  DCMF_Request_t request;
  volatile unsigned active;
  DCMF_Callback_t cb_done = { ARMCIX_DCMF_cb_decrement, (void *) &active };

  // Exchange the memory regions
  __memregions_to_receive = size;
  for (i = 0; i < size; i++)
  {
    unsigned peer = (rank+i)%size;
    active = 1;
    DCMF_Send (&send_protocol,
               &request,
               cb_done,
               DCMF_SEQUENTIAL_CONSISTENCY,
               peer,
               sizeof(DCMF_Memregion_t),
               (char *) &__connection[peer].local_mem_region,
               (DCQuad *) NULL,
               0);
    while (active) DCMF_Messager_advance();
  }
  while (__memregions_to_receive) DCMF_Messager_advance();

  DCMF_CriticalSection_exit(0);
}
예제 #22
0
void send_remoteadvance()
{

    DCMF_Request_t *send_req;
    DCMF_Callback_t send_done;
    int done_count;
    unsigned int msgsize, i, dst;
    DCQuad msginfo;

    send_req = (DCMF_Request_t *) malloc(sizeof(DCMF_Request_t)
            * ITERATIONS_LOCAL);

    send_done.function = done;
    send_done.clientdata = (void *) &done_count;

    if (myrank == 0)
    {
        printf("Send latency in usec\n");
        fflush(stdout);
    }

    if (myrank == 0)
    {
        char buffer[100];
        sprintf(buffer,
                "%20s  %20s %20s",
                "Msg Size",
                "Send-Remote Barrier",
                "Send-Remote Sleep");
        printf("%s \n", buffer);
        fflush(stdout);
    }

    if (myrank == 0)
    {

        for (msgsize = 1; msgsize < MAX_MSG_SIZE_LOCAL; msgsize *= 2)
        {

            /***********************
             * start timer          *
             ***********************/

            t_start = DCMF_Timebase();

            done_count = 10000;

            for (i = 0; i < ITERATIONS_LOCAL; i++)
            {
                DCMF_Send(&snd_reg,
                          &send_req[i],
                          send_done,
                          DCMF_SEQUENTIAL_CONSISTENCY,
                          (myrank + 1) % nranks,
                          msgsize,
                          source,
                          &msginfo,
                          1);
            }

            while (done_count > 0)
                DCMF_Messager_advance();

            t_stop = DCMF_Timebase();
            t_usec = (t_stop - t_start) / (clockMHz * ITERATIONS_LOCAL);

            /***********************
             * stop timer          *
             ***********************/

            if (myrank == 0)
            {
                printf("%20d %20.2f ", msgsize, t_usec);
                fflush(stdout);
            }

            barrier();

            /***********************
             * start timer          *
             ***********************/

            t_start = DCMF_Timebase();

            done_count = 10000;

            for (i = 0; i < ITERATIONS_LOCAL; i++)
            {
                DCMF_Send(&snd_reg,
                          &send_req[i],
                          send_done,
                          DCMF_SEQUENTIAL_CONSISTENCY,
                          (myrank + 1) % nranks,
                          msgsize,
                          source,
                          &msginfo,
                          1);
            }

            while (done_count > 0)
                DCMF_Messager_advance();

            t_stop = DCMF_Timebase();
            t_usec = (t_stop - t_start) / (clockMHz * ITERATIONS_LOCAL);

            /***********************
             * stop timer          *
             ***********************/

            if (myrank == 0)
            {
                printf("%20.2f \n", t_usec);
                fflush(stdout);
            }

            barrier();

        }

    }
    else
    {

        for (msgsize = 1; msgsize < MAX_MSG_SIZE_LOCAL; msgsize *= 2)
        {

            barrier();

            DCMF_CriticalSection_enter(0);

            sleep(10);

            DCMF_CriticalSection_exit(0);

            barrier();
        }

    }

    barrier();
}