Ejemplo n.º 1
0
/**
 * \brief Global fence operation.
 *
 * Blocks until all active messages between the local node and all remote
 * nodes have completed and acknowledged by the remote node.
 *
 * \see ARMCIX_Fence
 * \see ARMCIX_DCMF_ReceiveFenceRequest
 * \see ARMCIX_DCMF_ReceiveFenceAck
 */
void ARMCIX_AllFence ()
{
    DCMF_CriticalSection_enter (0);

    unsigned size = DCMF_Messager_size ();
    unsigned peer;

    volatile unsigned active = 0;
    DCQuad quad;
    DCMF_Callback_t * cb = (DCMF_Callback_t *) &quad;
    cb->function   = ARMCIX_DCMF_cb_decrement;
    cb->clientdata = (void *) &active;

    DCMF_Callback_t cb_null = { NULL, NULL };
    DCMF_Callback_t cb_done = { (void (*)(void *, DCMF_Error_t *))ARMCIX_DCMF_request_free, NULL };
    for (peer = 0; peer < size; peer++)
    {
        ARMCIX_DCMF_Request_t * new_request = ARMCIX_DCMF_request_allocate (cb_null);
        cb_done.clientdata = new_request;

        active++;
        DCMF_Send ( &__fence_rts_protocol,
                    &(new_request->request),
                    cb_done,
                    DCMF_SEQUENTIAL_CONSISTENCY,
                    peer,
                    0,
                    NULL,
                    (DCQuad *) &quad,
                    1);

        while (active) DCMF_Messager_advance ();
    }

    DCMF_CriticalSection_exit (0);
}
Ejemplo n.º 2
0
int main()
{

    int i, rank, nranks, msgsize, status, expected;
    long bufsize;
    int *src_buffer;
    int *trg_buffer;
    unsigned *ranks;
    DCMF_Result dcmf_result;
    DCMF_CollectiveProtocol_t barrier_protocol, lbarrier_protocol;
    DCMF_CollectiveProtocol_t allreduce_protocol, allreduce_notree_protocol;
    DCMF_Barrier_Configuration_t barrier_conf;
    DCMF_Allreduce_Configuration_t allreduce_conf;
    DCMF_CollectiveRequest_t crequest, crequest1, crequest2;
    DCMF_Callback_t done_callback;
    volatile unsigned allreduce_active = 0;

    DCMF_Messager_initialize();

    dcmf_result = DCMF_Collective_initialize();
    assert(dcmf_result == DCMF_SUCCESS);

    rank = DCMF_Messager_rank();
    nranks = DCMF_Messager_size();

    ranks = (unsigned *) malloc(nranks * sizeof(int));
    for(i=0; i<nranks; i++) ranks[i] = i;

    bufsize = MAX_MSG_SIZE;
    src_buffer = (int *) malloc(bufsize);
    trg_buffer = (int *) malloc(bufsize);

    barrier_conf.protocol = DCMF_GI_BARRIER_PROTOCOL;
    barrier_conf.cb_geometry = getGeometry; 
    dcmf_result = DCMF_Barrier_register(&barrier_protocol, &barrier_conf);
    assert(dcmf_result == DCMF_SUCCESS);

    barrier_conf.protocol = DCMF_LOCKBOX_BARRIER_PROTOCOL;
    barrier_conf.cb_geometry = getGeometry;
    dcmf_result = DCMF_Barrier_register(&lbarrier_protocol, &barrier_conf);
    assert(dcmf_result == DCMF_SUCCESS);

    DCMF_CollectiveProtocol_t  *barrier_ptr, *lbarrier_ptr;
    barrier_ptr = &barrier_protocol;
    lbarrier_ptr  = &lbarrier_protocol;
    dcmf_result = DCMF_Geometry_initialize(&geometry,
                                           0,
                                           ranks,
                                           nranks,
                                           &barrier_ptr,
                                           1,
                                           &lbarrier_ptr,
                                           1,
                                           &crequest,
                                           0,
                                           1);
    assert(dcmf_result == DCMF_SUCCESS);

    allreduce_conf.protocol = DCMF_TREE_ALLREDUCE_PROTOCOL;
    allreduce_conf.cb_geometry = getGeometry;
    allreduce_conf.reuse_storage = 1;
    dcmf_result = DCMF_Allreduce_register(&allreduce_protocol, &allreduce_conf);
    assert(dcmf_result == DCMF_SUCCESS);

    allreduce_conf.protocol = DCMF_TORUS_BINOMIAL_ALLREDUCE_PROTOCOL;
    allreduce_conf.cb_geometry = getGeometry;
    allreduce_conf.reuse_storage = 1;
    dcmf_result = DCMF_Allreduce_register(&allreduce_notree_protocol, &allreduce_conf);
    assert(dcmf_result == DCMF_SUCCESS);

    status = DCMF_Geometry_analyze(&geometry, &allreduce_protocol);
    assert(status == 1);

    status = DCMF_Geometry_analyze(&geometry, &allreduce_notree_protocol);
    assert(status == 1);

    done_callback.function = done;
    done_callback.clientdata = (void *) &allreduce_active;

    if (rank == 0)
    {
        printf("DCMF_Allreduce Test\n");
        fflush(stdout);
    }

    for (msgsize = sizeof(int); msgsize < MAX_MSG_SIZE; msgsize *= 2)
    {
        /*initializing buffer*/
        for (i = 0; i < bufsize/sizeof(int); i++)
        {
            src_buffer[i] = rank;
            trg_buffer[i] = 0;
        }

        allreduce_active += 1;

        /*sum reduce operation*/
        dcmf_result = DCMF_Allreduce(&allreduce_protocol,
                                     &crequest1,
                                     done_callback,
                                     DCMF_SEQUENTIAL_CONSISTENCY,
                                     &geometry,
                                     (char *) src_buffer,
                                     (char *) trg_buffer,
                                     msgsize/sizeof(int),
                                     DCMF_SIGNED_INT,
                                     DCMF_SUM);
        assert(dcmf_result == DCMF_SUCCESS);

        while(allreduce_active > 0) DCMF_Messager_advance();

        expected = (nranks-1)*(nranks)/2;
        for (i = 0; i < msgsize/sizeof(int); i++)
        {
            if(trg_buffer[i] - expected != 0)
            {
                printf("[%d] Validation has failed Expected: %d, Actual: %d, i: %d \n",
                       rank, expected, trg_buffer[i], i);
                fflush(stdout);
                exit(-1);
            }
        }

        printf("[%d] %d message sum allreduce successful \n", rank, msgsize);
        fflush(stdout);

        for (i = 0; i < bufsize/sizeof(int); i++)
        {
            src_buffer[i] = 1;
            trg_buffer[i] = 0;
        }

        allreduce_active += 1;

        /*sum reduce operation*/
        dcmf_result = DCMF_Allreduce(&allreduce_notree_protocol,
                                     &crequest2,
                                     done_callback,
                                     DCMF_SEQUENTIAL_CONSISTENCY,
                                     &geometry,
                                     (char *) src_buffer,
                                     (char *) trg_buffer,
                                     msgsize/sizeof(int),
                                     DCMF_SIGNED_INT,
                                     DCMF_PROD);
        assert(dcmf_result == DCMF_SUCCESS);

        while(allreduce_active > 0) DCMF_Messager_advance();

        expected = 1;
        for (i = 0; i < msgsize/sizeof(int); i++)
        {
            if(trg_buffer[i] - expected != 0)
            {
                printf("[%d] Validation has failed Expected: %d, Actual: %d, i: %d \n",
                       rank, expected, trg_buffer[i], i);
                fflush(stdout);
                exit(-1);
            }
        }
        printf("[%d] %d message product allreduce successful\n", rank, msgsize);
        fflush(stdout);
    }

    free(src_buffer);
    free(trg_buffer);

    DCMF_Messager_finalize();

    return 0;
}
Ejemplo n.º 3
0
void get_contention()
{

    unsigned int iter, size, dst;
    unsigned int i, j, k, s;
    unsigned int xdim, ydim, zdim;
    unsigned int xdisp, ydisp, zdisp;
    DCMF_Request_t get_req[ITERATIONS];
    DCMF_Callback_t get_done;
    unsigned int done_count;
    DCMF_NetworkCoord_t myaddr, dstaddr;
    DCMF_Network ntwk;
    char buf[50];

    get_done.function = done;
    get_done.clientdata = (void *) &done_count;

    DCMF_Messager_rank2network(nranks - 1, DCMF_TORUS_NETWORK, &dstaddr);
    xdim = dstaddr.torus.x + 1;
    ydim = dstaddr.torus.y + 1;
    zdim = dstaddr.torus.z + 1;

    if (myrank == 0)
    {
        printf("Dimensions of Torus : %d, %d, %d \n", xdim, ydim, zdim);
        fflush(stdout);
    }

    DCMF_Messager_rank2network(myrank, DCMF_TORUS_NETWORK, &myaddr);
    dstaddr.network = myaddr.network;
    dstaddr.torus.t = myaddr.torus.t;

    int size_array[] = { 8, 64, 512, 4096, 32768, 262144, 1048576 };
    int size_count = sizeof(size_array) / sizeof(int);

    int disp_array[][3] = { { 0, 0, 1 }, { 0, 0, 3 }, { 0, 3, 3 },
                             { 3, 3, 3 }, { 0, 1, 3 }, { 1, 1, 3 },
                             { 0, 2, 3 }, { 1, 2, 3 }, { 2, 2, 3 },
                             { 1, 3, 3 }, { 2, 3, 3 } };
    int disp_count = sizeof(disp_array) / (sizeof(int) * 3);

    for (s = 0; s < size_count; s++)
    {
        size = size_array[s];

        if (myrank == 0)
        {
            printf("Message Size : %20d \n", size);
            printf("%30s  %20s \n",
                   "Displacement b/w Pairs",
                   "Avg Bandwidth (Mbps)");
            fflush(stdout);
        }

        /*Assumes all dimensions are equal*/
        for (i = 0; i < disp_count; i++)
        {
            xdisp = disp_array[i][0];
            ydisp = disp_array[i][1];
            zdisp = disp_array[i][2];

            dstaddr.torus.x = (myaddr.torus.x + xdisp) % xdim;
            dstaddr.torus.y = (myaddr.torus.y + ydisp) % ydim;
            dstaddr.torus.z = (myaddr.torus.z + zdisp) % zdim;

            DCMF_Messager_network2rank(&dstaddr, &dst, &ntwk);

            barrier();

            /***********************
             * start timer          *
             ***********************/
            t_start = DCMF_Timebase();

            done_count = ITERATIONS;
            for (iter = 0; iter < ITERATIONS; iter++)
            {
                DCMF_Get(&get_reg,
                         &get_req[iter],
                         get_done,
                         DCMF_SEQUENTIAL_CONSISTENCY,
                         dst,
                         size,
                         memregion[dst],
                         memregion[myrank],
                         MAX_MSG_SIZE * ITERATIONS + iter * size,
                         iter * size);
            }
            while (done_count)
                DCMF_Messager_advance();

            t_stop = DCMF_Timebase();
            /***********************
             * stop timer          *
             ***********************/
            t_sec = (t_stop - t_start) / (clockMHz * 1000000);
            bw = (ITERATIONS * size) / (t_sec * 1024 * 1024);

            barrier();
            allreduce(-1,
                      (char *) &bw,
                      (char *) &bw_avg,
                      1,
                      DCMF_DOUBLE,
                      DCMF_SUM);

            if (myrank == 0)
            {
                bw_avg = bw_avg / nranks;
                sprintf(buf, "(%d)(%d)(%d)", xdisp, ydisp, zdisp);
                printf("%30s %20.0f \n", buf, bw_avg);
                fflush(stdout);
            }
        }

    }
}
Ejemplo n.º 4
0
int main()
{

    int i, rank, nranks, msgsize, status, expected;
    long bufsize;
    int *buffer;
    DCMF_Protocol_t ga_protocol;
    DCMF_GlobalAllreduce_Configuration_t ga_conf;
    DCMF_Request_t request;
    DCMF_Callback_t done_callback;
    volatile unsigned ga_active = 0;

    DCMF_Messager_initialize();

    rank = DCMF_Messager_rank();
    nranks = DCMF_Messager_size();

    bufsize = MAX_MSG_SIZE;
    buffer = (int *) malloc(bufsize);

    ga_conf.protocol = DCMF_DEFAULT_GLOBALALLREDUCE_PROTOCOL;
    status = DCMF_GlobalAllreduce_register(&ga_protocol,
                                           &ga_conf);
    if(status != DCMF_SUCCESS)
    { 
       printf("DCMF_GlobalAllreduce_register returned with error %d \n",
                 status);
       exit(-1);
    }

    done_callback.function = done;
    done_callback.clientdata = (void *) &ga_active;

    if (rank == 0)
    {
        printf("DCMF_Allreduce Test\n");
        fflush(stdout);
    }

    for (msgsize = sizeof(int); msgsize < MAX_MSG_SIZE; msgsize *= 2)
    {
            /*initializing buffer*/
            for (i = 0; i < bufsize/sizeof(int); i++)
            {
                 buffer[i] = rank;
            }

            ga_active += 1;

            /*sum reduce operation*/
            status = DCMF_GlobalAllreduce(&ga_protocol,
                                          &request,
                                          done_callback,
                                          DCMF_SEQUENTIAL_CONSISTENCY,
                                          -1,
                                          (char *) buffer,
                                          (char *) buffer,
                                          msgsize/sizeof(int),
                                          DCMF_SIGNED_INT,
                                          DCMF_SUM);

             while(ga_active > 0) DCMF_Messager_advance();

             expected = (nranks-1)*(nranks)/2;
             for (i = 0; i < msgsize/sizeof(int); i++)
             {
                if(buffer[i] - expected != 0)
                {
                   printf("[%d] Validation has failed Expected: %d, Actual: %d, i: %d \n",
                               rank, expected, buffer[i], i);
                   fflush(stdout);
                   exit(-1);
                }
             }

             printf("[%d] %d message sum reduce successful \n", rank, msgsize);
             fflush(stdout);

             for (i = 0; i < bufsize/sizeof(int); i++)
             {
                   buffer[i] = 1;
             }

            ga_active += 1;

            status = DCMF_GlobalAllreduce(&ga_protocol,
                                          &request,
                                          done_callback,
                                          DCMF_SEQUENTIAL_CONSISTENCY,
                                          -1,
                                          (char *) buffer,
                                          (char *) buffer,
                                          msgsize/sizeof(int),
                                          DCMF_SIGNED_INT,
                                          DCMF_PROD);

             while(ga_active > 0) DCMF_Messager_advance();

             expected = 1;
             for (i = 0; i < msgsize/sizeof(int); i++)
             {
                if(buffer[i] - expected != 0)
                {
                    printf("[%d] Validation has failed Expected: %d, Actual: %d, i: %d \n",
                                rank, expected, buffer[i], i);
                    fflush(stdout);
                    exit(-1);
                }
             }

             printf("[%d] %d message product reduce successful\n", rank, msgsize);
             fflush(stdout);

    }

    free(buffer);
    DCMF_Messager_finalize();

    return 0;
}
Ejemplo n.º 5
0
void ARMCIX_DCMF_Connection_initialize ()
{
  DCMF_CriticalSection_enter(0);

  __global_connection.peer = (unsigned) -1;

  unsigned rank = DCMF_Messager_rank ();
  unsigned size = DCMF_Messager_size ();
  posix_memalign ((void **)&__connection, 16, sizeof(ARMCIX_DCMF_Connection_t) * size);
  bzero ((void *)__connection, sizeof(ARMCIX_DCMF_Connection_t) * size);

  void * base  = NULL;
  size_t bytes = (size_t) -1;

  unsigned i;
  for (i = 0; i < size; i++)
  {
    __connection[i].peer = i;
#warning fix memregion setup to handle non-global address space pinning.
    //DCMF_Result result =
      DCMF_Memregion_create (&__connection[i].local_mem_region,
                             &bytes, (size_t) -1, NULL, 0);
  }

  // Register a send protocol to exchange memory regions
  DCMF_Protocol_t send_protocol;
  DCMF_Send_Configuration_t send_configuration = {
    DCMF_DEFAULT_SEND_PROTOCOL,
    DCMF_DEFAULT_NETWORK,
    ARMCIX_DCMF_RecvMemregion1,
    __connection,
    ARMCIX_DCMF_RecvMemregion2,
    __connection
  };
  DCMF_Send_register (&send_protocol, &send_configuration);

  DCMF_Request_t request;
  volatile unsigned active;
  DCMF_Callback_t cb_done = { ARMCIX_DCMF_cb_decrement, (void *) &active };

  // Exchange the memory regions
  __memregions_to_receive = size;
  for (i = 0; i < size; i++)
  {
    unsigned peer = (rank+i)%size;
    active = 1;
    DCMF_Send (&send_protocol,
               &request,
               cb_done,
               DCMF_SEQUENTIAL_CONSISTENCY,
               peer,
               sizeof(DCMF_Memregion_t),
               (char *) &__connection[peer].local_mem_region,
               (DCQuad *) NULL,
               0);
    while (active) DCMF_Messager_advance();
  }
  while (__memregions_to_receive) DCMF_Messager_advance();

  DCMF_CriticalSection_exit(0);
}
Ejemplo n.º 6
0
void put_restart()
{

    if (myrank == 0)
    {

        DCMF_Request_t put_req[ITERATIONS + SKIP];
        DCMF_Callback_t put_done, put_ack;
        int done_count, ack_count;
        int msgsize, i;

        put_done.function = done;
        put_done.clientdata = (void *) &done_count;
        put_ack.function = done;
        put_ack.clientdata = (void *) &ack_count;

        char buffer[50];
        sprintf(buffer,
                "%20s %20s %20s",
                "Msg Size",
                "Latency(usec)",
                "Restart-latency(usec)");
        printf("%s \n", buffer);
        fflush(stdout);

        barrier();

        for (msgsize = 1; msgsize < MAX_MSG_SIZE; msgsize *= 2)
        {

            /***********************
             * warmup               *
             ***********************/
            ack_count = SKIP;
            for (i = 0; i < SKIP; i++)
            {
                DCMF_Put(&put_reg,
                         &put_req[i],
                         put_done,
                         DCMF_SEQUENTIAL_CONSISTENCY,
                         1,
                         msgsize,
                         memregion[myrank],
                         memregion[myrank + 1],
                         i * msgsize,
                         i * msgsize,
                         put_ack);
            }
            while (ack_count)
                DCMF_Messager_advance();

            /***********************
             * start timer          *
             ***********************/
            t_start = DCMF_Timebase();
            ack_count = ITERATIONS;

            for (i = SKIP; i < ITERATIONS + SKIP; i++)
            {
                DCMF_Put(&put_reg,
                         &put_req[i],
                         put_done,
                         DCMF_SEQUENTIAL_CONSISTENCY,
                         1,
                         msgsize,
                         memregion[myrank],
                         memregion[myrank + 1],
                         i * msgsize,
                         i * msgsize,
                         put_ack);
            }

            while (ack_count)
                DCMF_Messager_advance();
            t_stop = DCMF_Timebase();
            /***********************
             * stop timer          *
             ***********************/

            t_usec = ((t_stop - t_start) / clockMHz);
            printf("%20d %20.0f ", msgsize, t_usec / (ITERATIONS));

            /***********************
             * start timer          *
             ***********************/
            t_start = DCMF_Timebase();
            ack_count = ITERATIONS;

            for (i = SKIP; i < ITERATIONS + SKIP; i++)
            {
                DCMF_Restart(&put_req[i]);
            }

            while (ack_count)
                DCMF_Messager_advance();
            t_stop = DCMF_Timebase();
            /***********************
             * stop timer          *
             ***********************/

            t_usec = ((t_stop - t_start) / clockMHz);
            printf("%20.0f\n", t_usec / (ITERATIONS));

        }

        barrier();

    }
    else
    {

        barrier();

        barrier();

    }
}
Ejemplo n.º 7
0
void send_remoteadvance()
{

    DCMF_Request_t *send_req;
    DCMF_Callback_t send_done;
    int done_count;
    unsigned int msgsize, i, dst;
    DCQuad msginfo;

    send_req = (DCMF_Request_t *) malloc(sizeof(DCMF_Request_t)
            * ITERATIONS_LOCAL);

    send_done.function = done;
    send_done.clientdata = (void *) &done_count;

    if (myrank == 0)
    {
        printf("Send latency in usec\n");
        fflush(stdout);
    }

    if (myrank == 0)
    {
        char buffer[100];
        sprintf(buffer,
                "%20s  %20s %20s",
                "Msg Size",
                "Send-Remote Barrier",
                "Send-Remote Sleep");
        printf("%s \n", buffer);
        fflush(stdout);
    }

    if (myrank == 0)
    {

        for (msgsize = 1; msgsize < MAX_MSG_SIZE_LOCAL; msgsize *= 2)
        {

            /***********************
             * start timer          *
             ***********************/

            t_start = DCMF_Timebase();

            done_count = 10000;

            for (i = 0; i < ITERATIONS_LOCAL; i++)
            {
                DCMF_Send(&snd_reg,
                          &send_req[i],
                          send_done,
                          DCMF_SEQUENTIAL_CONSISTENCY,
                          (myrank + 1) % nranks,
                          msgsize,
                          source,
                          &msginfo,
                          1);
            }

            while (done_count > 0)
                DCMF_Messager_advance();

            t_stop = DCMF_Timebase();
            t_usec = (t_stop - t_start) / (clockMHz * ITERATIONS_LOCAL);

            /***********************
             * stop timer          *
             ***********************/

            if (myrank == 0)
            {
                printf("%20d %20.2f ", msgsize, t_usec);
                fflush(stdout);
            }

            barrier();

            /***********************
             * start timer          *
             ***********************/

            t_start = DCMF_Timebase();

            done_count = 10000;

            for (i = 0; i < ITERATIONS_LOCAL; i++)
            {
                DCMF_Send(&snd_reg,
                          &send_req[i],
                          send_done,
                          DCMF_SEQUENTIAL_CONSISTENCY,
                          (myrank + 1) % nranks,
                          msgsize,
                          source,
                          &msginfo,
                          1);
            }

            while (done_count > 0)
                DCMF_Messager_advance();

            t_stop = DCMF_Timebase();
            t_usec = (t_stop - t_start) / (clockMHz * ITERATIONS_LOCAL);

            /***********************
             * stop timer          *
             ***********************/

            if (myrank == 0)
            {
                printf("%20.2f \n", t_usec);
                fflush(stdout);
            }

            barrier();

        }

    }
    else
    {

        for (msgsize = 1; msgsize < MAX_MSG_SIZE_LOCAL; msgsize *= 2)
        {

            barrier();

            DCMF_CriticalSection_enter(0);

            sleep(10);

            DCMF_CriticalSection_exit(0);

            barrier();
        }

    }

    barrier();
}
Ejemplo n.º 8
0
void send_localvsremote()
{

    DCMF_Request_t send_req[ITERATIONS];
    DCMF_Callback_t send_done, nocallback;
    int done_count;
    unsigned int msgsize, i, dst;
    DCMF_NetworkCoord_t myaddr, dstaddr;
    DCMF_Network ntwk;
    DCQuad msginfo[ITERATIONS];

    DCMF_Messager_rank2network(myrank, DCMF_TORUS_NETWORK, &myaddr);

    dstaddr.network = myaddr.network;
    dstaddr.torus.x = (myaddr.torus.x + 3) % 8;
    dstaddr.torus.y = (myaddr.torus.y + 3) % 8;
    dstaddr.torus.z = (myaddr.torus.z + 3) % 8;
    dstaddr.torus.t = myaddr.torus.t;

    DCMF_Messager_network2rank(&dstaddr, &dst, &ntwk);

    send_done.function = done;
    send_done.clientdata = (void *) &done_count;
    nocallback.function = NULL;
    nocallback.clientdata = NULL;

    if (myrank == 0)
    {
        printf("Send call overhead in usec\n");
        fflush(stdout);
    }

    if (myrank == 0)
    {
        char buffer[100];
        sprintf(buffer,
                "%20s  %20s %20s",
                "Msg Size",
                "Farthest pairs",
                "Closest pairs");
        printf("%s \n", buffer);
        fflush(stdout);
    }

    for (msgsize = 1; msgsize < MAX_MSG_SIZE; msgsize *= 2)
    {

        /***********************
         * warmup               *
         ***********************/
        snd_rcv_active += SKIP;
        done_count += SKIP;
        for (i = 0; i < SKIP; i++)
        {
            DCMF_Send(&snd_reg,
                      &send_req[i],
                      send_done,
                      DCMF_SEQUENTIAL_CONSISTENCY,
                      dst,
                      msgsize,
                      source + i * msgsize,
                      &msginfo[i],
                      1);
        }
        while (done_count || snd_rcv_active)
            DCMF_Messager_advance();

        t_avg = 0;
        t_avg1 = 0, t_avg2 = 0;
        target_index = 0;
        barrier();

        snd_rcv_active += ITERATIONS;

        t_start = DCMF_Timebase();

        for (i = 0; i < ITERATIONS; i++)
        {
            DCMF_Send(&snd_reg,
                      &send_req[i],
                      nocallback,
                      DCMF_SEQUENTIAL_CONSISTENCY,
                      dst,
                      msgsize,
                      source + i * msgsize,
                      &msginfo[i],
                      1);
        }

        t_stop = DCMF_Timebase();
        t_usec = (t_stop - t_start) / (clockMHz * ITERATIONS);

        while (snd_rcv_active)
            DCMF_Messager_advance();

        barrier();
        allreduce(-1,
                  (char *) &t_usec,
                  (char *) &t_avg,
                  1,
                  DCMF_DOUBLE,
                  DCMF_SUM);
        barrier();
        target_index = 0;

        snd_rcv_active += ITERATIONS;

        t_start = DCMF_Timebase();

        for (i = 0; i < ITERATIONS; i++)
        {
            DCMF_Send(&snd_reg,
                      &send_req[i],
                      nocallback,
                      DCMF_SEQUENTIAL_CONSISTENCY,
                      (myrank + 1) % nranks,
                      msgsize,
                      source + i * msgsize,
                      &msginfo[i],
                      1);
        }

        t_stop = DCMF_Timebase();
        t_usec1 = (t_stop - t_start) / (clockMHz * ITERATIONS);

        while (snd_rcv_active)
            DCMF_Messager_advance();

        barrier();
        allreduce(-1,
                  (char *) &t_usec1,
                  (char *) &t_avg1,
                  1,
                  DCMF_DOUBLE,
                  DCMF_SUM);
        barrier();

        if (myrank == 0)
        {
            t_avg = t_avg / nranks;
            t_avg1 = t_avg1 / nranks;
            printf("%20d %20.2f %20.2f \n", msgsize, t_avg, t_avg1);
            fflush(stdout);
        }
    }

    if (myrank == 0)
    {
        printf("Send latency in usec with local vs remote completion \n");
        fflush(stdout);
    }

    if (myrank == 0)
    {
        char buffer[100];
        sprintf(buffer,
                "%20s  %20s  %20s  %20s  %20s %20s  %20s",
                "Msg Size",
                "Farthest pairs-local",
                "Farthest pairs-remote",
                "Farthest pairs-both",
                "Closest pairs-local",
                "Closest pairs-remote",
                "Closest pairs-both");
        printf("%s \n", buffer);
        fflush(stdout);
    }

    barrier();

    for (msgsize = 1; msgsize < MAX_MSG_SIZE; msgsize *= 2)
    {

        /***********************
         * start timer          *
         ***********************/

        snd_rcv_active += ITERATIONS;

        t_start = DCMF_Timebase();

        for (i = 0; i < ITERATIONS; i++)
        {
            done_count = 1;
            DCMF_Send(&snd_reg,
                      &send_req[i],
                      send_done,
                      DCMF_SEQUENTIAL_CONSISTENCY,
                      dst,
                      msgsize,
                      source + i * msgsize,
                      &msginfo[i],
                      1);
            while (done_count)
                DCMF_Messager_advance();
        }

        t_stop = DCMF_Timebase();
        t_usec = (t_stop - t_start) / (clockMHz * ITERATIONS);

        while (snd_rcv_active)
            DCMF_Messager_advance();

        barrier();
        allreduce(-1,
                  (char *) &t_usec,
                  (char *) &t_avg,
                  1,
                  DCMF_DOUBLE,
                  DCMF_SUM);
        barrier();
        target_index = 0;

        t_start = DCMF_Timebase();

        for (i = 0; i < ITERATIONS; i++)
        {
            ack_rcv_active = 1;
            DCMF_Send(&rcb_snd_reg,
                      &send_req[i],
                      nocallback,
                      DCMF_SEQUENTIAL_CONSISTENCY,
                      dst,
                      msgsize,
                      source + i * msgsize,
                      &msginfo[i],
                      1);
            while (ack_rcv_active)
                DCMF_Messager_advance();
        }

        t_stop = DCMF_Timebase();
        t_usec1 = (t_stop - t_start) / (clockMHz * ITERATIONS);

        barrier();
        allreduce(-1,
                  (char *) &t_usec1,
                  (char *) &t_avg1,
                  1,
                  DCMF_DOUBLE,
                  DCMF_SUM);
        barrier();
        target_index = 0;

        t_start = DCMF_Timebase();

        for (i = 0; i < ITERATIONS; i++)
        {
            done_count = 1;
            ack_rcv_active = 1;
            DCMF_Send(&rcb_snd_reg,
                      &send_req[i],
                      send_done,
                      DCMF_SEQUENTIAL_CONSISTENCY,
                      dst,
                      msgsize,
                      source + i * msgsize,
                      &msginfo[i],
                      1);
            while (done_count || ack_rcv_active)
                DCMF_Messager_advance();
        }

        t_stop = DCMF_Timebase();
        t_usec2 = (t_stop - t_start) / (clockMHz * ITERATIONS);

        /***********************
         * stop timer          *
         ***********************/

        barrier();
        allreduce(-1,
                  (char *) &t_usec2,
                  (char *) &t_avg2,
                  1,
                  DCMF_DOUBLE,
                  DCMF_SUM);
        barrier();

        if (myrank == 0)
        {
            t_avg = t_avg / nranks;
            t_avg1 = t_avg1 / nranks;
            t_avg2 = t_avg2 / nranks;
            printf("%20d %20.2f %20.2f %20.2f", msgsize, t_avg, t_avg1, t_avg2);
            fflush(stdout);
        }

        t_avg = 0;
        t_avg1 = 0, t_avg2 = 0;
        target_index = 0;

        barrier();

        /***********************
         * start timer          *
         ***********************/

        snd_rcv_active += ITERATIONS;

        t_start = DCMF_Timebase();

        for (i = 0; i < ITERATIONS; i++)
        {
            done_count = 1;
            DCMF_Send(&snd_reg,
                      &send_req[i],
                      send_done,
                      DCMF_SEQUENTIAL_CONSISTENCY,
                      (myrank + 1) % nranks,
                      msgsize,
                      source + i * msgsize,
                      &msginfo[i],
                      1);
            while (done_count)
                DCMF_Messager_advance();
        }

        t_stop = DCMF_Timebase();
        t_usec = (t_stop - t_start) / (clockMHz * ITERATIONS);

        while (snd_rcv_active)
            DCMF_Messager_advance();

        barrier();
        allreduce(-1,
                  (char *) &t_usec,
                  (char *) &t_avg,
                  1,
                  DCMF_DOUBLE,
                  DCMF_SUM);
        barrier();
        target_index = 0;

        t_start = DCMF_Timebase();

        for (i = 0; i < ITERATIONS; i++)
        {
            ack_rcv_active = 1;
            DCMF_Send(&rcb_snd_reg,
                      &send_req[i],
                      nocallback,
                      DCMF_SEQUENTIAL_CONSISTENCY,
                      (myrank + 1) % nranks,
                      msgsize,
                      source + i * msgsize,
                      &msginfo[i],
                      1);
            while (ack_rcv_active)
                DCMF_Messager_advance();
        }

        t_stop = DCMF_Timebase();
        t_usec1 = (t_stop - t_start) / (clockMHz * ITERATIONS);

        barrier();
        allreduce(-1,
                  (char *) &t_usec1,
                  (char *) &t_avg1,
                  1,
                  DCMF_DOUBLE,
                  DCMF_SUM);
        barrier();
        target_index = 0;

        t_start = DCMF_Timebase();

        for (i = 0; i < ITERATIONS; i++)
        {
            done_count = 1;
            ack_rcv_active = 1;
            DCMF_Send(&rcb_snd_reg,
                      &send_req[i],
                      send_done,
                      DCMF_SEQUENTIAL_CONSISTENCY,
                      (myrank + 1) % nranks,
                      msgsize,
                      source + i * msgsize,
                      &msginfo[i],
                      1);
            while (done_count || ack_rcv_active)
                DCMF_Messager_advance();
        }

        t_stop = DCMF_Timebase();
        t_usec2 = (t_stop - t_start) / (clockMHz * ITERATIONS);

        /***********************
         * stop timer          *
         ***********************/

        allreduce(-1,
                  (char *) &t_usec2,
                  (char *) &t_avg2,
                  1,
                  DCMF_DOUBLE,
                  DCMF_SUM);
        barrier();

        if (myrank == 0)
        {
            t_avg = t_avg / nranks;
            t_avg1 = t_avg1 / nranks;
            t_avg2 = t_avg2 / nranks;
            printf("%20.2f %20.2f %20.2f \n", t_avg, t_avg1, t_avg2);
            fflush(stdout);
        }

    }
}
Ejemplo n.º 9
0
void memcpyvsput()
{

    DCMF_Request_t put_req[ITERATIONS_LOCAL];
    DCMF_Callback_t put_done, put_ack;
    int done_count, ack_count;
    unsigned int msgsize, i, dst;

    put_done.function = done;
    put_done.clientdata = (void *) &done_count;
    put_ack.function = done;
    put_ack.clientdata = (void *) &ack_count;

    if (myrank == 0)
    {
        char buffer[200];
        sprintf(buffer,
                "%20s %20s %20s %30s %20s %20s",
                "Msg Size",
                "DCMF_Put_Internode",
                "DCMF_Put_Intranode",
                "DCMF_Put_Intranode (Busy DMA)",
                "Memcpy",
                "Memcpy (Busy DMA)");
        printf("%s \n", buffer);
        fflush(stdout);
    }

    barrier();

    if (myrank == 0)
    {

        for (msgsize = 1; msgsize <= MAX_MSG_SIZE_LOCAL; msgsize *= 2)
        {

            /***********************
             * start timer          *
             ***********************/

            t_start = DCMF_Timebase();

            for (i = 0; i < ITERATIONS_LOCAL; i++)
            {
                done_count = 1;
                ack_count = 1;
                DCMF_Put(&put_reg,
                         &put_req[i],
                         put_done,
                         DCMF_SEQUENTIAL_CONSISTENCY,
                         2,
                         msgsize,
                         memregion[0],
                         memregion[2],
                         i * msgsize,
                         i * msgsize,
                         put_ack);
                while (done_count > 0 || ack_count > 0)
                    DCMF_Messager_advance();
            }

            t_stop = DCMF_Timebase();
            t_usec = ((t_stop - t_start) / clockMHz) / ITERATIONS_LOCAL;

            /***********************
             * stop timer          *
             ***********************/

            if (myrank == 0)
            {
                printf("%20d %20.2f ", msgsize, t_usec);
                fflush(stdout);
            }

            /***********************
             * start timer          *
             ***********************/

            t_start = DCMF_Timebase();

            for (i = 0; i < ITERATIONS_LOCAL; i++)
            {
                done_count = 1;
                ack_count = 1;
                DCMF_Put(&put_reg,
                         &put_req[i],
                         put_done,
                         DCMF_SEQUENTIAL_CONSISTENCY,
                         1,
                         msgsize,
                         memregion[0],
                         memregion[1],
                         i * msgsize,
                         i * msgsize,
                         put_ack);
                while (done_count > 0 || ack_count > 0)
                    DCMF_Messager_advance();
            }

            t_stop = DCMF_Timebase();
            t_usec = ((t_stop - t_start) / clockMHz) / ITERATIONS_LOCAL;

            /***********************
             * stop timer          *
             ***********************/

            if (myrank == 0)
            {
                printf("%20.2f ", t_usec);
                fflush(stdout);
            }

            /***********************
             * start timer          *
             ***********************/

            barrier();

            t_start = DCMF_Timebase();

            for (i = 0; i < ITERATIONS_LOCAL; i++)
            {
                done_count = 1;
                ack_count = 1;
                DCMF_Put(&put_reg,
                         &put_req[i],
                         put_done,
                         DCMF_SEQUENTIAL_CONSISTENCY,
                         1,
                         msgsize,
                         memregion[0],
                         memregion[1],
                         i * msgsize,
                         i * msgsize,
                         put_ack);
                while (done_count > 0 || ack_count > 0)
                    DCMF_Messager_advance();
            }

            t_stop = DCMF_Timebase();
            t_usec = ((t_stop - t_start) / clockMHz) / ITERATIONS_LOCAL;

            /***********************
             * stop timer          *
             ***********************/

            if (myrank == 0)
            {
                printf("%28.2f ", t_usec);
                fflush(stdout);
            }

            /***********************
             * start timer          *
             ***********************/

            t_start = DCMF_Timebase();

            for (i = 0; i < ITERATIONS_LOCAL; i++)
            {

                memcpy(window + ITERATIONS_LOCAL * MAX_MSG_SIZE_LOCAL + i
                        * msgsize, window + i * msgsize, msgsize);

            }

            t_stop = DCMF_Timebase();
            t_usec = ((t_stop - t_start) / clockMHz) / ITERATIONS_LOCAL;

            /***********************
             * stop timer          *
             ***********************/

            printf("%20.2f ", t_usec);
            fflush(stdout);

            /***********************
             * start timer          *
             ***********************/

            barrier();

            t_start = DCMF_Timebase();

            for (i = 0; i < ITERATIONS_LOCAL; i++)
            {

                memcpy(window + ITERATIONS_LOCAL * MAX_MSG_SIZE_LOCAL + i
                        * msgsize, window + i * msgsize, msgsize);

            }

            t_stop = DCMF_Timebase();
            t_usec = ((t_stop - t_start) / clockMHz) / ITERATIONS_LOCAL;

            /***********************
             * stop timer          *
             ***********************/

            printf("%20.2f \n", t_usec);
            fflush(stdout);

        }

    }
    else
    {

        for (msgsize = 1; msgsize <= MAX_MSG_SIZE_LOCAL; msgsize *= 2)
        {

            barrier();

            for (i = 0; i < ITERATIONS_LOCAL; i++)
            {
                done_count = 1;
                ack_count = 1;
                DCMF_Put(&put_reg,
                         &put_req[i],
                         put_done,
                         DCMF_SEQUENTIAL_CONSISTENCY,
                         0,
                         msgsize,
                         memregion[myrank],
                         memregion[0],
                         i * msgsize,
                         i * msgsize,
                         put_ack);
                while (done_count > 0 || ack_count > 0)
                    DCMF_Messager_advance();
            }

            barrier();

            for (i = 0; i < ITERATIONS_LOCAL; i++)
            {
                done_count = 1;
                ack_count = 1;
                DCMF_Put(&put_reg,
                         &put_req[i],
                         put_done,
                         DCMF_SEQUENTIAL_CONSISTENCY,
                         0,
                         msgsize,
                         memregion[myrank],
                         memregion[0],
                         i * msgsize,
                         i * msgsize,
                         put_ack);
                while (done_count > 0 || ack_count > 0)
                    DCMF_Messager_advance();

            }

        }

    }

    barrier();
}
Ejemplo n.º 10
0
int main(int argc, void* argv[])
{
    DCMF_Configure_t config;

    config.thread_level = DCMF_THREAD_MULTIPLE;

    DCMF_Messager_initialize();

    DCMF_Messager_configure(&config, &config);

    init();

    if (nranks != (THREAD_NUM + 1))
    {
        printf("This test requires only %d processes \n", (THREAD_NUM + 1));
        fflush(stdout);
        return -1;
    }

    barrier_init(DCMF_DEFAULT_GLOBALBARRIER_PROTOCOL);

    control_init(DCMF_DEFAULT_CONTROL_PROTOCOL, DCMF_DEFAULT_NETWORK);

    memregion_init(LOCAL_MAX_BUF_SIZE * THREAD_NUM);

    get_init(DCMF_DEFAULT_PUT_PROTOCOL, DCMF_TORUS_NETWORK);

    source = (char *) malloc(LOCAL_MAX_BUF_SIZE * THREAD_NUM);
    target = (char *) malloc(LOCAL_MAX_BUF_SIZE * THREAD_NUM);

    send_init(DCMF_DEFAULT_SEND_PROTOCOL, DCMF_TORUS_NETWORK);

    int status;
    long i;

    if (myrank == 0)
    {

        pthread_t threads[THREAD_NUM];
        pthread_barrier_init(&ptbarrier, NULL, THREAD_NUM);
        pthread_barrier_init(&ptbarrier1, NULL, THREAD_NUM);

        for (i = 0; i < THREAD_NUM; i++)
        {
            pthread_create(&threads[i], NULL, mrate_test, (void *) i);
        }

        for (i = 0; i < THREAD_NUM; i++)
        {
            pthread_join(threads[i], (void *) &status);
        }
    }
    else
    {

        snd_rcv_active += LOCAL_ITERATIONS;
        while (snd_rcv_active > 0)
            DCMF_Messager_advance();

    }

    barrier();

    DCMF_Messager_finalize();

    if (myrank == 0)
    {
        printf("Benchmark Complete \n");
        fflush(stdout);
    }

    return (0);
}