Ejemplo n.º 1
0
/**
 * \brief Point-to-point fence operation.
 *
 * Blocks until all active messages between the local node and the remote
 * node have completed and acknowledged by the remote node.
 *
 * \param[in] proc       Rank of the remote node to fence
 *
 * \see ARMCIX_AllFence
 * \see ARMCIX_DCMF_ReceiveFenceRequest
 * \see ARMCIX_DCMF_ReceiveFenceAck
 */
void ARMCIX_Fence (int proc)
{
    DCMF_CriticalSection_enter (0);

    DCMF_Request_t request;
    volatile unsigned active = 1;
    DCQuad quad;
    DCMF_Callback_t * cb = (DCMF_Callback_t *) &quad;
    cb->function   = ARMCIX_DCMF_cb_decrement;
    cb->clientdata = (void *) &active;
    DCMF_Send ( &__fence_rts_protocol,
                &request,
    (DCMF_Callback_t) {
        NULL, NULL
    },
    DCMF_SEQUENTIAL_CONSISTENCY,
    proc,
    0,
    NULL,
    (DCQuad *) &quad,
    1);

    while (active) DCMF_Messager_advance ();

    DCMF_CriticalSection_exit (0);
}
Ejemplo n.º 2
0
/**
 * \brief ARMCI Extension blocking read-modify-write operation.
 *
 * \param[in] op
 * \param[in] ploc
 * \param[in] prem
 * \param[in] extra
 * \param[in] proc
 *
 * \retval ???
 */
int ARMCIX_Rmw (int op, int * ploc, int * prem, int extra, int proc)
{
  DCMF_CriticalSection_enter (0);

  volatile unsigned active = 1;
  
  //fprintf (stderr, "ARMCIX_Rmw() - op == %d, ploc == %p, prem == %p, extra == %d, proc == %d\n", op, ploc, prem, extra, proc);

  /* Initialize the RMW request data                                        */
  ARMCIX_DCMF_RMWRequest_t info;
  info.op = op;
  info.ploc = ploc;
  info.prem = prem;
  switch (op)
  {
    case ARMCI_FETCH_AND_ADD:
    case ARMCI_FETCH_AND_ADD_LONG:
      info.extra = extra;
      break;
    case ARMCI_SWAP:
    case ARMCI_SWAP_LONG:
      info.extra = *ploc;
      break;
    default: 
      armci_die("rmw: operation not supported",op);
      break;
  }

  info.active = (unsigned *)&active;

  DCMF_Request_t request;
  DCMF_Callback_t cb_wait = { NULL, NULL };

  DCMF_Send ( &__rmw_request_protocol,
              &request,
              cb_wait,
              DCMF_SEQUENTIAL_CONSISTENCY,
              proc,
              0,
              NULL,
              (DCQuad *)&info,
              2);

  //fprintf (stderr, "ARMCIX_Rmw() > active == %d (&active == %p)\n", active, &active);
  while (active) DCMF_Messager_advance ();
  //fprintf (stderr, "ARMCIX_Rmw() < active == %d (&active == %p)\n", active, &active);

  DCMF_CriticalSection_exit  (0);

  return 0;
}
Ejemplo n.º 3
0
/**
 * \brief Global fence operation.
 *
 * Blocks until all active messages between the local node and all remote
 * nodes have completed and acknowledged by the remote node.
 *
 * \see ARMCIX_Fence
 * \see ARMCIX_DCMF_ReceiveFenceRequest
 * \see ARMCIX_DCMF_ReceiveFenceAck
 */
void ARMCIX_AllFence ()
{
    DCMF_CriticalSection_enter (0);

    unsigned size = DCMF_Messager_size ();
    unsigned peer;

    volatile unsigned active = 0;
    DCQuad quad;
    DCMF_Callback_t * cb = (DCMF_Callback_t *) &quad;
    cb->function   = ARMCIX_DCMF_cb_decrement;
    cb->clientdata = (void *) &active;

    DCMF_Callback_t cb_null = { NULL, NULL };
    DCMF_Callback_t cb_done = { (void (*)(void *, DCMF_Error_t *))ARMCIX_DCMF_request_free, NULL };
    for (peer = 0; peer < size; peer++)
    {
        ARMCIX_DCMF_Request_t * new_request = ARMCIX_DCMF_request_allocate (cb_null);
        cb_done.clientdata = new_request;

        active++;
        DCMF_Send ( &__fence_rts_protocol,
                    &(new_request->request),
                    cb_done,
                    DCMF_SEQUENTIAL_CONSISTENCY,
                    peer,
                    0,
                    NULL,
                    (DCQuad *) &quad,
                    1);

        while (active) DCMF_Messager_advance ();
    }

    DCMF_CriticalSection_exit (0);
}
Ejemplo n.º 4
0
int A1DI_Direct_putaccv(int target,
                        A1_iov_t *iov_ar,
                        int ar_len,
                        A1_datatype_t a1_type,
                        void *scaling,
                        A1D_Handle_t *a1d_handle)
{
    int i, j, status = A1_SUCCESS;
    A1D_Putacc_header_t header;
    A1D_Request_t *a1d_request;
    DCMF_Callback_t done_callback;

    A1U_FUNC_ENTER();

    header.datatype = a1_type;
    switch (a1_type)
    {
        case A1_DOUBLE:
            (header.scaling).double_value = *((double *) scaling);
            break;
        case A1_INT32:
            (header.scaling).int32_value = *((int32_t *) scaling);
            break;
        case A1_INT64:
            (header.scaling).int64_value = *((int64_t *) scaling);
            break;
        case A1_UINT32:
            (header.scaling).uint32_value = *((uint32_t *) scaling);
            break;
        case A1_UINT64:
            (header.scaling).uint64_value = *((uint64_t *) scaling);
            break;
        case A1_FLOAT:
            (header.scaling).float_value = *((float *) scaling);
            break;
        default:
            status = A1_ERROR;
            A1U_ERR_POP((status != A1_SUCCESS),"Invalid data type in putacc \n");
            break;
    }

    for (i=0; i<ar_len; i++)
    {
        for(j=0; j<iov_ar[i].ptr_ar_len; j++)
        {

           a1d_request = A1DI_Get_request(1);
           A1U_ERR_POP(status = (a1d_request == NULL),
                "A1DI_Get_request returned error.\n");
           A1DI_Set_handle(a1d_request, a1d_handle);

           done_callback.function = A1DI_Request_done;
           done_callback.clientdata = (void *) a1d_request;
 
           a1d_handle->active++;

           header.target_ptr = iov_ar[i].target_ptr_ar[j];
 
           status = DCMF_Send(&A1D_Generic_putacc_protocol,
                              &(a1d_request->request).message_request, /* TODO verify */
                              done_callback,
                              DCMF_SEQUENTIAL_CONSISTENCY,
                              target,
                              iov_ar[i].size,
                              iov_ar[i].source_ptr_ar[j],
                              (DCQuad *) &header,
                              (unsigned) 2);
           A1U_ERR_POP((status != DCMF_SUCCESS), "Putacc returned with an error \n");
 
           A1D_Connection_send_active[target]++;
        }
    }

  fn_exit: 
    A1U_FUNC_EXIT();
    return status;

  fn_fail: 
    goto fn_exit;
}
Ejemplo n.º 5
0
void ARMCIX_DCMF_Connection_initialize ()
{
  DCMF_CriticalSection_enter(0);

  __global_connection.peer = (unsigned) -1;

  unsigned rank = DCMF_Messager_rank ();
  unsigned size = DCMF_Messager_size ();
  posix_memalign ((void **)&__connection, 16, sizeof(ARMCIX_DCMF_Connection_t) * size);
  bzero ((void *)__connection, sizeof(ARMCIX_DCMF_Connection_t) * size);

  void * base  = NULL;
  size_t bytes = (size_t) -1;

  unsigned i;
  for (i = 0; i < size; i++)
  {
    __connection[i].peer = i;
#warning fix memregion setup to handle non-global address space pinning.
    //DCMF_Result result =
      DCMF_Memregion_create (&__connection[i].local_mem_region,
                             &bytes, (size_t) -1, NULL, 0);
  }

  // Register a send protocol to exchange memory regions
  DCMF_Protocol_t send_protocol;
  DCMF_Send_Configuration_t send_configuration = {
    DCMF_DEFAULT_SEND_PROTOCOL,
    DCMF_DEFAULT_NETWORK,
    ARMCIX_DCMF_RecvMemregion1,
    __connection,
    ARMCIX_DCMF_RecvMemregion2,
    __connection
  };
  DCMF_Send_register (&send_protocol, &send_configuration);

  DCMF_Request_t request;
  volatile unsigned active;
  DCMF_Callback_t cb_done = { ARMCIX_DCMF_cb_decrement, (void *) &active };

  // Exchange the memory regions
  __memregions_to_receive = size;
  for (i = 0; i < size; i++)
  {
    unsigned peer = (rank+i)%size;
    active = 1;
    DCMF_Send (&send_protocol,
               &request,
               cb_done,
               DCMF_SEQUENTIAL_CONSISTENCY,
               peer,
               sizeof(DCMF_Memregion_t),
               (char *) &__connection[peer].local_mem_region,
               (DCQuad *) NULL,
               0);
    while (active) DCMF_Messager_advance();
  }
  while (__memregions_to_receive) DCMF_Messager_advance();

  DCMF_CriticalSection_exit(0);
}
Ejemplo n.º 6
0
void send_remoteadvance()
{

    DCMF_Request_t *send_req;
    DCMF_Callback_t send_done;
    int done_count;
    unsigned int msgsize, i, dst;
    DCQuad msginfo;

    send_req = (DCMF_Request_t *) malloc(sizeof(DCMF_Request_t)
            * ITERATIONS_LOCAL);

    send_done.function = done;
    send_done.clientdata = (void *) &done_count;

    if (myrank == 0)
    {
        printf("Send latency in usec\n");
        fflush(stdout);
    }

    if (myrank == 0)
    {
        char buffer[100];
        sprintf(buffer,
                "%20s  %20s %20s",
                "Msg Size",
                "Send-Remote Barrier",
                "Send-Remote Sleep");
        printf("%s \n", buffer);
        fflush(stdout);
    }

    if (myrank == 0)
    {

        for (msgsize = 1; msgsize < MAX_MSG_SIZE_LOCAL; msgsize *= 2)
        {

            /***********************
             * start timer          *
             ***********************/

            t_start = DCMF_Timebase();

            done_count = 10000;

            for (i = 0; i < ITERATIONS_LOCAL; i++)
            {
                DCMF_Send(&snd_reg,
                          &send_req[i],
                          send_done,
                          DCMF_SEQUENTIAL_CONSISTENCY,
                          (myrank + 1) % nranks,
                          msgsize,
                          source,
                          &msginfo,
                          1);
            }

            while (done_count > 0)
                DCMF_Messager_advance();

            t_stop = DCMF_Timebase();
            t_usec = (t_stop - t_start) / (clockMHz * ITERATIONS_LOCAL);

            /***********************
             * stop timer          *
             ***********************/

            if (myrank == 0)
            {
                printf("%20d %20.2f ", msgsize, t_usec);
                fflush(stdout);
            }

            barrier();

            /***********************
             * start timer          *
             ***********************/

            t_start = DCMF_Timebase();

            done_count = 10000;

            for (i = 0; i < ITERATIONS_LOCAL; i++)
            {
                DCMF_Send(&snd_reg,
                          &send_req[i],
                          send_done,
                          DCMF_SEQUENTIAL_CONSISTENCY,
                          (myrank + 1) % nranks,
                          msgsize,
                          source,
                          &msginfo,
                          1);
            }

            while (done_count > 0)
                DCMF_Messager_advance();

            t_stop = DCMF_Timebase();
            t_usec = (t_stop - t_start) / (clockMHz * ITERATIONS_LOCAL);

            /***********************
             * stop timer          *
             ***********************/

            if (myrank == 0)
            {
                printf("%20.2f \n", t_usec);
                fflush(stdout);
            }

            barrier();

        }

    }
    else
    {

        for (msgsize = 1; msgsize < MAX_MSG_SIZE_LOCAL; msgsize *= 2)
        {

            barrier();

            DCMF_CriticalSection_enter(0);

            sleep(10);

            DCMF_CriticalSection_exit(0);

            barrier();
        }

    }

    barrier();
}
Ejemplo n.º 7
0
int OSPDI_Direct_putaccv(int target,
                        OSP_iov_t *iov_ar,
                        int ar_len,
                        OSP_datatype_t osp_type,
                        void *scaling,
                        OSPD_Handle_t *ospd_handle)
{
    int i, j, status = OSP_SUCCESS;
    OSPD_Putacc_header_t header;
    OSPD_Request_t *ospd_request;
    DCMF_Callback_t done_callback;

    OSPU_FUNC_ENTER();

    header.datatype = osp_type;
    switch (osp_type)
    {
        case OSP_DOUBLE:
            (header.scaling).double_value = *((double *) scaling);
            break;
        case OSP_INT32:
            (header.scaling).int32_value = *((int32_t *) scaling);
            break;
        case OSP_INT64:
            (header.scaling).int64_value = *((int64_t *) scaling);
            break;
        case OSP_UINT32:
            (header.scaling).uint32_value = *((uint32_t *) scaling);
            break;
        case OSP_UINT64:
            (header.scaling).uint64_value = *((uint64_t *) scaling);
            break;
        case OSP_FLOAT:
            (header.scaling).float_value = *((float *) scaling);
            break;
        default:
            status = OSP_ERROR;
            OSPU_ERR_POP((status != OSP_SUCCESS),"Invalid data type in putacc \n");
            break;
    }

    for (i=0; i<ar_len; i++)
    {
        for(j=0; j<iov_ar[i].ptr_ar_len; j++)
        {

           ospd_request = OSPDI_Get_request(1);
           OSPU_ERR_POP(status = (ospd_request == NULL),
                "OSPDI_Get_request returned error.\n");
           OSPDI_Set_handle(ospd_request, ospd_handle);

           done_callback.function = OSPDI_Request_done;
           done_callback.clientdata = (void *) ospd_request;
 
           ospd_handle->active++;

           header.target_ptr = iov_ar[i].target_ptr_ar[j];
 
           status = DCMF_Send(&OSPD_Generic_putacc_protocol,
                              &(ospd_request->request),
                              done_callback,
                              DCMF_SEQUENTIAL_CONSISTENCY,
                              target,
                              iov_ar[i].size,
                              iov_ar[i].source_ptr_ar[j],
                              (DCQuad *) &header,
                              (unsigned) 2);
           OSPU_ERR_POP((status != DCMF_SUCCESS), "Putacc returned with an error \n");
 
           OSPD_Connection_send_active[target]++;
        }
    }

  fn_exit: 
    OSPU_FUNC_EXIT();
    return status;

  fn_fail: 
    goto fn_exit;
}
Ejemplo n.º 8
0
void send_localvsremote()
{

    DCMF_Request_t send_req[ITERATIONS];
    DCMF_Callback_t send_done, nocallback;
    int done_count;
    unsigned int msgsize, i, dst;
    DCMF_NetworkCoord_t myaddr, dstaddr;
    DCMF_Network ntwk;
    DCQuad msginfo[ITERATIONS];

    DCMF_Messager_rank2network(myrank, DCMF_TORUS_NETWORK, &myaddr);

    dstaddr.network = myaddr.network;
    dstaddr.torus.x = (myaddr.torus.x + 3) % 8;
    dstaddr.torus.y = (myaddr.torus.y + 3) % 8;
    dstaddr.torus.z = (myaddr.torus.z + 3) % 8;
    dstaddr.torus.t = myaddr.torus.t;

    DCMF_Messager_network2rank(&dstaddr, &dst, &ntwk);

    send_done.function = done;
    send_done.clientdata = (void *) &done_count;
    nocallback.function = NULL;
    nocallback.clientdata = NULL;

    if (myrank == 0)
    {
        printf("Send call overhead in usec\n");
        fflush(stdout);
    }

    if (myrank == 0)
    {
        char buffer[100];
        sprintf(buffer,
                "%20s  %20s %20s",
                "Msg Size",
                "Farthest pairs",
                "Closest pairs");
        printf("%s \n", buffer);
        fflush(stdout);
    }

    for (msgsize = 1; msgsize < MAX_MSG_SIZE; msgsize *= 2)
    {

        /***********************
         * warmup               *
         ***********************/
        snd_rcv_active += SKIP;
        done_count += SKIP;
        for (i = 0; i < SKIP; i++)
        {
            DCMF_Send(&snd_reg,
                      &send_req[i],
                      send_done,
                      DCMF_SEQUENTIAL_CONSISTENCY,
                      dst,
                      msgsize,
                      source + i * msgsize,
                      &msginfo[i],
                      1);
        }
        while (done_count || snd_rcv_active)
            DCMF_Messager_advance();

        t_avg = 0;
        t_avg1 = 0, t_avg2 = 0;
        target_index = 0;
        barrier();

        snd_rcv_active += ITERATIONS;

        t_start = DCMF_Timebase();

        for (i = 0; i < ITERATIONS; i++)
        {
            DCMF_Send(&snd_reg,
                      &send_req[i],
                      nocallback,
                      DCMF_SEQUENTIAL_CONSISTENCY,
                      dst,
                      msgsize,
                      source + i * msgsize,
                      &msginfo[i],
                      1);
        }

        t_stop = DCMF_Timebase();
        t_usec = (t_stop - t_start) / (clockMHz * ITERATIONS);

        while (snd_rcv_active)
            DCMF_Messager_advance();

        barrier();
        allreduce(-1,
                  (char *) &t_usec,
                  (char *) &t_avg,
                  1,
                  DCMF_DOUBLE,
                  DCMF_SUM);
        barrier();
        target_index = 0;

        snd_rcv_active += ITERATIONS;

        t_start = DCMF_Timebase();

        for (i = 0; i < ITERATIONS; i++)
        {
            DCMF_Send(&snd_reg,
                      &send_req[i],
                      nocallback,
                      DCMF_SEQUENTIAL_CONSISTENCY,
                      (myrank + 1) % nranks,
                      msgsize,
                      source + i * msgsize,
                      &msginfo[i],
                      1);
        }

        t_stop = DCMF_Timebase();
        t_usec1 = (t_stop - t_start) / (clockMHz * ITERATIONS);

        while (snd_rcv_active)
            DCMF_Messager_advance();

        barrier();
        allreduce(-1,
                  (char *) &t_usec1,
                  (char *) &t_avg1,
                  1,
                  DCMF_DOUBLE,
                  DCMF_SUM);
        barrier();

        if (myrank == 0)
        {
            t_avg = t_avg / nranks;
            t_avg1 = t_avg1 / nranks;
            printf("%20d %20.2f %20.2f \n", msgsize, t_avg, t_avg1);
            fflush(stdout);
        }
    }

    if (myrank == 0)
    {
        printf("Send latency in usec with local vs remote completion \n");
        fflush(stdout);
    }

    if (myrank == 0)
    {
        char buffer[100];
        sprintf(buffer,
                "%20s  %20s  %20s  %20s  %20s %20s  %20s",
                "Msg Size",
                "Farthest pairs-local",
                "Farthest pairs-remote",
                "Farthest pairs-both",
                "Closest pairs-local",
                "Closest pairs-remote",
                "Closest pairs-both");
        printf("%s \n", buffer);
        fflush(stdout);
    }

    barrier();

    for (msgsize = 1; msgsize < MAX_MSG_SIZE; msgsize *= 2)
    {

        /***********************
         * start timer          *
         ***********************/

        snd_rcv_active += ITERATIONS;

        t_start = DCMF_Timebase();

        for (i = 0; i < ITERATIONS; i++)
        {
            done_count = 1;
            DCMF_Send(&snd_reg,
                      &send_req[i],
                      send_done,
                      DCMF_SEQUENTIAL_CONSISTENCY,
                      dst,
                      msgsize,
                      source + i * msgsize,
                      &msginfo[i],
                      1);
            while (done_count)
                DCMF_Messager_advance();
        }

        t_stop = DCMF_Timebase();
        t_usec = (t_stop - t_start) / (clockMHz * ITERATIONS);

        while (snd_rcv_active)
            DCMF_Messager_advance();

        barrier();
        allreduce(-1,
                  (char *) &t_usec,
                  (char *) &t_avg,
                  1,
                  DCMF_DOUBLE,
                  DCMF_SUM);
        barrier();
        target_index = 0;

        t_start = DCMF_Timebase();

        for (i = 0; i < ITERATIONS; i++)
        {
            ack_rcv_active = 1;
            DCMF_Send(&rcb_snd_reg,
                      &send_req[i],
                      nocallback,
                      DCMF_SEQUENTIAL_CONSISTENCY,
                      dst,
                      msgsize,
                      source + i * msgsize,
                      &msginfo[i],
                      1);
            while (ack_rcv_active)
                DCMF_Messager_advance();
        }

        t_stop = DCMF_Timebase();
        t_usec1 = (t_stop - t_start) / (clockMHz * ITERATIONS);

        barrier();
        allreduce(-1,
                  (char *) &t_usec1,
                  (char *) &t_avg1,
                  1,
                  DCMF_DOUBLE,
                  DCMF_SUM);
        barrier();
        target_index = 0;

        t_start = DCMF_Timebase();

        for (i = 0; i < ITERATIONS; i++)
        {
            done_count = 1;
            ack_rcv_active = 1;
            DCMF_Send(&rcb_snd_reg,
                      &send_req[i],
                      send_done,
                      DCMF_SEQUENTIAL_CONSISTENCY,
                      dst,
                      msgsize,
                      source + i * msgsize,
                      &msginfo[i],
                      1);
            while (done_count || ack_rcv_active)
                DCMF_Messager_advance();
        }

        t_stop = DCMF_Timebase();
        t_usec2 = (t_stop - t_start) / (clockMHz * ITERATIONS);

        /***********************
         * stop timer          *
         ***********************/

        barrier();
        allreduce(-1,
                  (char *) &t_usec2,
                  (char *) &t_avg2,
                  1,
                  DCMF_DOUBLE,
                  DCMF_SUM);
        barrier();

        if (myrank == 0)
        {
            t_avg = t_avg / nranks;
            t_avg1 = t_avg1 / nranks;
            t_avg2 = t_avg2 / nranks;
            printf("%20d %20.2f %20.2f %20.2f", msgsize, t_avg, t_avg1, t_avg2);
            fflush(stdout);
        }

        t_avg = 0;
        t_avg1 = 0, t_avg2 = 0;
        target_index = 0;

        barrier();

        /***********************
         * start timer          *
         ***********************/

        snd_rcv_active += ITERATIONS;

        t_start = DCMF_Timebase();

        for (i = 0; i < ITERATIONS; i++)
        {
            done_count = 1;
            DCMF_Send(&snd_reg,
                      &send_req[i],
                      send_done,
                      DCMF_SEQUENTIAL_CONSISTENCY,
                      (myrank + 1) % nranks,
                      msgsize,
                      source + i * msgsize,
                      &msginfo[i],
                      1);
            while (done_count)
                DCMF_Messager_advance();
        }

        t_stop = DCMF_Timebase();
        t_usec = (t_stop - t_start) / (clockMHz * ITERATIONS);

        while (snd_rcv_active)
            DCMF_Messager_advance();

        barrier();
        allreduce(-1,
                  (char *) &t_usec,
                  (char *) &t_avg,
                  1,
                  DCMF_DOUBLE,
                  DCMF_SUM);
        barrier();
        target_index = 0;

        t_start = DCMF_Timebase();

        for (i = 0; i < ITERATIONS; i++)
        {
            ack_rcv_active = 1;
            DCMF_Send(&rcb_snd_reg,
                      &send_req[i],
                      nocallback,
                      DCMF_SEQUENTIAL_CONSISTENCY,
                      (myrank + 1) % nranks,
                      msgsize,
                      source + i * msgsize,
                      &msginfo[i],
                      1);
            while (ack_rcv_active)
                DCMF_Messager_advance();
        }

        t_stop = DCMF_Timebase();
        t_usec1 = (t_stop - t_start) / (clockMHz * ITERATIONS);

        barrier();
        allreduce(-1,
                  (char *) &t_usec1,
                  (char *) &t_avg1,
                  1,
                  DCMF_DOUBLE,
                  DCMF_SUM);
        barrier();
        target_index = 0;

        t_start = DCMF_Timebase();

        for (i = 0; i < ITERATIONS; i++)
        {
            done_count = 1;
            ack_rcv_active = 1;
            DCMF_Send(&rcb_snd_reg,
                      &send_req[i],
                      send_done,
                      DCMF_SEQUENTIAL_CONSISTENCY,
                      (myrank + 1) % nranks,
                      msgsize,
                      source + i * msgsize,
                      &msginfo[i],
                      1);
            while (done_count || ack_rcv_active)
                DCMF_Messager_advance();
        }

        t_stop = DCMF_Timebase();
        t_usec2 = (t_stop - t_start) / (clockMHz * ITERATIONS);

        /***********************
         * stop timer          *
         ***********************/

        allreduce(-1,
                  (char *) &t_usec2,
                  (char *) &t_avg2,
                  1,
                  DCMF_DOUBLE,
                  DCMF_SUM);
        barrier();

        if (myrank == 0)
        {
            t_avg = t_avg / nranks;
            t_avg1 = t_avg1 / nranks;
            t_avg2 = t_avg2 / nranks;
            printf("%20.2f %20.2f %20.2f \n", t_avg, t_avg1, t_avg2);
            fflush(stdout);
        }

    }
}
Ejemplo n.º 9
0
void *mrate_test(void *threadid)
{

    unsigned i, channel, base, count, msgsize;
    DCMF_Protocol_t protocol;
    DCMF_Callback_t cb_done;
    uint32_t done_count;
    DCMF_Request_t *req;
    DCMF_Messager_advance_options adv_options;
    DCQuad msginfo;

    req = (DCMF_Request_t *) malloc(sizeof(DCMF_Request_t) * LOCAL_ITERATIONS);

    cb_done.function = done;
    cb_done.clientdata = (void *) &done_count;

    DCMF_Channel_info(&protocol, &base, &count, &channel);
    DCMF_Channel_acquire(base + (long) threadid);

    msgsize = 512;

    if ((long) threadid == 0)
    {
        printf("%10s %20s %30s \n",
               "Thread ID",
               "Message Size",
               "Injection Rate (MBps)");
        fflush(stdout);
    }

    pthread_barrier_wait(&ptbarrier);

    t_start = DCMF_Timebase();

    done_count = LOCAL_ITERATIONS;
    for (i = 0; i < LOCAL_ITERATIONS; i++)
    {

        DCMF_Send(&snd_reg,
                  &req[i],
                  cb_done,
                  DCMF_SEQUENTIAL_CONSISTENCY,
                  (long) threadid + 1,
                  msgsize,
                  source,
                  &msginfo,
                  1);

    }

    adv_options.channel = base + (long) threadid;
    while (done_count > 0)
        DCMF_Messager_advance_expert(adv_options);

    t_stop = DCMF_Timebase();
    t_sec = (t_stop - t_start) / (clockMHz * 1000000);
    printf("%10d %20d %26.4f \n",
           (long) threadid,
           msgsize,
           ((double) LOCAL_ITERATIONS * msgsize) / (t_sec * (double) 1024
                   * 1024));
    fflush(stdout);

    pthread_barrier_wait(&ptbarrier1);

    DCMF_Channel_release();

    return;
}