Example #1
0
/**
 * \brief Global fence operation.
 *
 * Blocks until all active messages between the local node and all remote
 * nodes have completed and acknowledged by the remote node.
 *
 * \see ARMCIX_Fence
 * \see ARMCIX_DCMF_ReceiveFenceRequest
 * \see ARMCIX_DCMF_ReceiveFenceAck
 */
void ARMCIX_AllFence ()
{
    DCMF_CriticalSection_enter (0);

    unsigned size = DCMF_Messager_size ();
    unsigned peer;

    volatile unsigned active = 0;
    DCQuad quad;
    DCMF_Callback_t * cb = (DCMF_Callback_t *) &quad;
    cb->function   = ARMCIX_DCMF_cb_decrement;
    cb->clientdata = (void *) &active;

    DCMF_Callback_t cb_null = { NULL, NULL };
    DCMF_Callback_t cb_done = { (void (*)(void *, DCMF_Error_t *))ARMCIX_DCMF_request_free, NULL };
    for (peer = 0; peer < size; peer++)
    {
        ARMCIX_DCMF_Request_t * new_request = ARMCIX_DCMF_request_allocate (cb_null);
        cb_done.clientdata = new_request;

        active++;
        DCMF_Send ( &__fence_rts_protocol,
                    &(new_request->request),
                    cb_done,
                    DCMF_SEQUENTIAL_CONSISTENCY,
                    peer,
                    0,
                    NULL,
                    (DCQuad *) &quad,
                    1);

        while (active) DCMF_Messager_advance ();
    }

    DCMF_CriticalSection_exit (0);
}
int main()
{

    int i, rank, nranks, msgsize, status, expected;
    long bufsize;
    int *src_buffer;
    int *trg_buffer;
    unsigned *ranks;
    DCMF_Result dcmf_result;
    DCMF_CollectiveProtocol_t barrier_protocol, lbarrier_protocol;
    DCMF_CollectiveProtocol_t allreduce_protocol, allreduce_notree_protocol;
    DCMF_Barrier_Configuration_t barrier_conf;
    DCMF_Allreduce_Configuration_t allreduce_conf;
    DCMF_CollectiveRequest_t crequest, crequest1, crequest2;
    DCMF_Callback_t done_callback;
    volatile unsigned allreduce_active = 0;

    DCMF_Messager_initialize();

    dcmf_result = DCMF_Collective_initialize();
    assert(dcmf_result == DCMF_SUCCESS);

    rank = DCMF_Messager_rank();
    nranks = DCMF_Messager_size();

    ranks = (unsigned *) malloc(nranks * sizeof(int));
    for(i=0; i<nranks; i++) ranks[i] = i;

    bufsize = MAX_MSG_SIZE;
    src_buffer = (int *) malloc(bufsize);
    trg_buffer = (int *) malloc(bufsize);

    barrier_conf.protocol = DCMF_GI_BARRIER_PROTOCOL;
    barrier_conf.cb_geometry = getGeometry; 
    dcmf_result = DCMF_Barrier_register(&barrier_protocol, &barrier_conf);
    assert(dcmf_result == DCMF_SUCCESS);

    barrier_conf.protocol = DCMF_LOCKBOX_BARRIER_PROTOCOL;
    barrier_conf.cb_geometry = getGeometry;
    dcmf_result = DCMF_Barrier_register(&lbarrier_protocol, &barrier_conf);
    assert(dcmf_result == DCMF_SUCCESS);

    DCMF_CollectiveProtocol_t  *barrier_ptr, *lbarrier_ptr;
    barrier_ptr = &barrier_protocol;
    lbarrier_ptr  = &lbarrier_protocol;
    dcmf_result = DCMF_Geometry_initialize(&geometry,
                                           0,
                                           ranks,
                                           nranks,
                                           &barrier_ptr,
                                           1,
                                           &lbarrier_ptr,
                                           1,
                                           &crequest,
                                           0,
                                           1);
    assert(dcmf_result == DCMF_SUCCESS);

    allreduce_conf.protocol = DCMF_TREE_ALLREDUCE_PROTOCOL;
    allreduce_conf.cb_geometry = getGeometry;
    allreduce_conf.reuse_storage = 1;
    dcmf_result = DCMF_Allreduce_register(&allreduce_protocol, &allreduce_conf);
    assert(dcmf_result == DCMF_SUCCESS);

    allreduce_conf.protocol = DCMF_TORUS_BINOMIAL_ALLREDUCE_PROTOCOL;
    allreduce_conf.cb_geometry = getGeometry;
    allreduce_conf.reuse_storage = 1;
    dcmf_result = DCMF_Allreduce_register(&allreduce_notree_protocol, &allreduce_conf);
    assert(dcmf_result == DCMF_SUCCESS);

    status = DCMF_Geometry_analyze(&geometry, &allreduce_protocol);
    assert(status == 1);

    status = DCMF_Geometry_analyze(&geometry, &allreduce_notree_protocol);
    assert(status == 1);

    done_callback.function = done;
    done_callback.clientdata = (void *) &allreduce_active;

    if (rank == 0)
    {
        printf("DCMF_Allreduce Test\n");
        fflush(stdout);
    }

    for (msgsize = sizeof(int); msgsize < MAX_MSG_SIZE; msgsize *= 2)
    {
        /*initializing buffer*/
        for (i = 0; i < bufsize/sizeof(int); i++)
        {
            src_buffer[i] = rank;
            trg_buffer[i] = 0;
        }

        allreduce_active += 1;

        /*sum reduce operation*/
        dcmf_result = DCMF_Allreduce(&allreduce_protocol,
                                     &crequest1,
                                     done_callback,
                                     DCMF_SEQUENTIAL_CONSISTENCY,
                                     &geometry,
                                     (char *) src_buffer,
                                     (char *) trg_buffer,
                                     msgsize/sizeof(int),
                                     DCMF_SIGNED_INT,
                                     DCMF_SUM);
        assert(dcmf_result == DCMF_SUCCESS);

        while(allreduce_active > 0) DCMF_Messager_advance();

        expected = (nranks-1)*(nranks)/2;
        for (i = 0; i < msgsize/sizeof(int); i++)
        {
            if(trg_buffer[i] - expected != 0)
            {
                printf("[%d] Validation has failed Expected: %d, Actual: %d, i: %d \n",
                       rank, expected, trg_buffer[i], i);
                fflush(stdout);
                exit(-1);
            }
        }

        printf("[%d] %d message sum allreduce successful \n", rank, msgsize);
        fflush(stdout);

        for (i = 0; i < bufsize/sizeof(int); i++)
        {
            src_buffer[i] = 1;
            trg_buffer[i] = 0;
        }

        allreduce_active += 1;

        /*sum reduce operation*/
        dcmf_result = DCMF_Allreduce(&allreduce_notree_protocol,
                                     &crequest2,
                                     done_callback,
                                     DCMF_SEQUENTIAL_CONSISTENCY,
                                     &geometry,
                                     (char *) src_buffer,
                                     (char *) trg_buffer,
                                     msgsize/sizeof(int),
                                     DCMF_SIGNED_INT,
                                     DCMF_PROD);
        assert(dcmf_result == DCMF_SUCCESS);

        while(allreduce_active > 0) DCMF_Messager_advance();

        expected = 1;
        for (i = 0; i < msgsize/sizeof(int); i++)
        {
            if(trg_buffer[i] - expected != 0)
            {
                printf("[%d] Validation has failed Expected: %d, Actual: %d, i: %d \n",
                       rank, expected, trg_buffer[i], i);
                fflush(stdout);
                exit(-1);
            }
        }
        printf("[%d] %d message product allreduce successful\n", rank, msgsize);
        fflush(stdout);
    }

    free(src_buffer);
    free(trg_buffer);

    DCMF_Messager_finalize();

    return 0;
}
Example #3
0
int A1D_Initialize()
{
    int mpi_initialized, mpi_provided;
    int mpi_status;
    int i;
    size_t bytes_in, bytes_out;
    DCMF_Result dcmf_result;
    DCMF_Configure_t dcmf_config;
    DCMF_Memregion_t local_memregion;

    /***************************************************
     *
     * configure MPI
     *
     ***************************************************/

    /* MPI has to be initialized for this implementation to work */
    MPI_Initialized(&mpi_initialized);
    assert(mpi_initialized==1);

    /* MPI has to be thread-safe so that DCMF doesn't explode */
    MPI_Query_thread(&mpi_provided);
    assert(mpi_provided==MPI_THREAD_MULTIPLE);

    /* have to use our own communicator for collectives to be proper */
    mpi_status = MPI_Comm_dup(MPI_COMM_WORLD,&A1D_COMM_WORLD);
    assert(mpi_status==0);

    /* get my MPI rank */
    mpi_status = MPI_Comm_rank(A1D_COMM_WORLD,&myrank);
    assert(mpi_status==0);

    /* get MPI world size */
    mpi_status = MPI_Comm_size(A1D_COMM_WORLD,&mpi_size);
    assert(mpi_status==0);

    /* make sure MPI and DCMF agree */
    assert(myrank==DCMF_Messager_rank());
    assert(mpi_size==DCMF_Messager_size());

    /* barrier before DCMF_Messager_configure to make sure MPI is ready everywhere */
    mpi_status = MPI_Barrier(A1D_COMM_WORLD);
    assert(mpi_status==0);

    /***************************************************
     *
     * configure DCMF
     *
     ***************************************************/

    /* to be safe, but perhaps not necessary */
    dcmf_config.thread_level = DCMF_THREAD_MULTIPLE;
#ifdef ACCUMULATE_IMPLEMENTED
    /* interrupts required for accumulate only, Put/Get use DMA
     * if accumulate not used, MPI will query environment for DCMF_INTERRUPTS */
    dcmf_config.interrupts = DCMF_INTERRUPTS_ON;
#endif

    /* reconfigure DCMF with interrupts on */
    DCMF_CriticalSection_enter(0);
    dcmf_result = DCMF_Messager_configure(&dcmf_config, &dcmf_config);
    assert(dcmf_result==DCMF_SUCCESS);
    DCMF_CriticalSection_exit(0);

    /* barrier after DCMF_Messager_configure to make sure everyone has the new DCMF config */
    mpi_status = MPI_Barrier(A1D_COMM_WORLD);
    assert(mpi_status==0);

    /***************************************************
     *
     * setup DCMF memregions
     *
     ***************************************************/

    /* allocate memregion list */
    A1D_Memregion_list = malloc( mpi_size * sizeof(DCMF_Memregion_t) );
    assert(A1D_Memregion_list != NULL);

    /* allocate base pointer list */
    A1D_Baseptr_list = malloc( mpi_size * sizeof(void*) );
    assert(A1D_Memregion_list != NULL);

    /* create memregions */
    bytes_in = -1;
    DCMF_CriticalSection_enter(0);
    dcmf_result = DCMF_Memregion_create(&local_memregion,&bytes_out,bytes_in,NULL,0);
    assert(dcmf_result==DCMF_SUCCESS);
    DCMF_CriticalSection_exit(0);

    /* exchange memregions because we don't use symmetry heap */
    mpi_status = MPI_Allgather(&local_memregion,sizeof(DCMF_Memregion_t),MPI_BYTE,
                               A1D_Memregion_list,sizeof(DCMF_Memregion_t),MPI_BYTE,
                               A1D_COMM_WORLD);
    assert(mpi_status==0);

    /* destroy temporary local memregion */
    DCMF_CriticalSection_enter(0);
    dcmf_result = DCMF_Memregion_destroy(&local_memregion);
    assert(dcmf_result==DCMF_SUCCESS);
    DCMF_CriticalSection_exit(0);

    /* check for valid memregions */
    DCMF_CriticalSection_enter(0);
    for (i = 0; i < mpi_size; i++)
    {
        dcmf_result = DCMF_Memregion_query(&A1D_Memregion_list[i],
                                           &bytes_out,
                                           &A1D_Baseptr_list[i]);
        assert(dcmf_result==DCMF_SUCCESS);
    }
    DCMF_CriticalSection_exit(0);

#ifdef FLUSH_IMPLEMENTED
    /***************************************************
     *
     * setup flush list(s)
     *
     ***************************************************/

    /* allocate Put list */
    A1D_Put_flush_list = malloc( mpi_size * sizeof(int) );
    assert(A1D_Put_flush_list != NULL);

  #ifdef ACCUMULATE_IMPLEMENTED
    /* allocate Acc list */
    A1D_Send_flush_list = malloc( mpi_size * sizeof(int) );
    assert(A1D_Send_flush_list != NULL);
  #endif

#endif

    /***************************************************
     *
     * define null callback
     *
     ***************************************************/

    A1D_Nocallback.function = NULL;
    A1D_Nocallback.clientdata = NULL;

    return(0);
}
Example #4
0
int main()
{

    int i, rank, nranks, msgsize, status, expected;
    long bufsize;
    int *buffer;
    DCMF_Protocol_t ga_protocol;
    DCMF_GlobalAllreduce_Configuration_t ga_conf;
    DCMF_Request_t request;
    DCMF_Callback_t done_callback;
    volatile unsigned ga_active = 0;

    DCMF_Messager_initialize();

    rank = DCMF_Messager_rank();
    nranks = DCMF_Messager_size();

    bufsize = MAX_MSG_SIZE;
    buffer = (int *) malloc(bufsize);

    ga_conf.protocol = DCMF_DEFAULT_GLOBALALLREDUCE_PROTOCOL;
    status = DCMF_GlobalAllreduce_register(&ga_protocol,
                                           &ga_conf);
    if(status != DCMF_SUCCESS)
    { 
       printf("DCMF_GlobalAllreduce_register returned with error %d \n",
                 status);
       exit(-1);
    }

    done_callback.function = done;
    done_callback.clientdata = (void *) &ga_active;

    if (rank == 0)
    {
        printf("DCMF_Allreduce Test\n");
        fflush(stdout);
    }

    for (msgsize = sizeof(int); msgsize < MAX_MSG_SIZE; msgsize *= 2)
    {
            /*initializing buffer*/
            for (i = 0; i < bufsize/sizeof(int); i++)
            {
                 buffer[i] = rank;
            }

            ga_active += 1;

            /*sum reduce operation*/
            status = DCMF_GlobalAllreduce(&ga_protocol,
                                          &request,
                                          done_callback,
                                          DCMF_SEQUENTIAL_CONSISTENCY,
                                          -1,
                                          (char *) buffer,
                                          (char *) buffer,
                                          msgsize/sizeof(int),
                                          DCMF_SIGNED_INT,
                                          DCMF_SUM);

             while(ga_active > 0) DCMF_Messager_advance();

             expected = (nranks-1)*(nranks)/2;
             for (i = 0; i < msgsize/sizeof(int); i++)
             {
                if(buffer[i] - expected != 0)
                {
                   printf("[%d] Validation has failed Expected: %d, Actual: %d, i: %d \n",
                               rank, expected, buffer[i], i);
                   fflush(stdout);
                   exit(-1);
                }
             }

             printf("[%d] %d message sum reduce successful \n", rank, msgsize);
             fflush(stdout);

             for (i = 0; i < bufsize/sizeof(int); i++)
             {
                   buffer[i] = 1;
             }

            ga_active += 1;

            status = DCMF_GlobalAllreduce(&ga_protocol,
                                          &request,
                                          done_callback,
                                          DCMF_SEQUENTIAL_CONSISTENCY,
                                          -1,
                                          (char *) buffer,
                                          (char *) buffer,
                                          msgsize/sizeof(int),
                                          DCMF_SIGNED_INT,
                                          DCMF_PROD);

             while(ga_active > 0) DCMF_Messager_advance();

             expected = 1;
             for (i = 0; i < msgsize/sizeof(int); i++)
             {
                if(buffer[i] - expected != 0)
                {
                    printf("[%d] Validation has failed Expected: %d, Actual: %d, i: %d \n",
                                rank, expected, buffer[i], i);
                    fflush(stdout);
                    exit(-1);
                }
             }

             printf("[%d] %d message product reduce successful\n", rank, msgsize);
             fflush(stdout);

    }

    free(buffer);
    DCMF_Messager_finalize();

    return 0;
}
Example #5
0
void ARMCIX_DCMF_Connection_initialize ()
{
  DCMF_CriticalSection_enter(0);

  __global_connection.peer = (unsigned) -1;

  unsigned rank = DCMF_Messager_rank ();
  unsigned size = DCMF_Messager_size ();
  posix_memalign ((void **)&__connection, 16, sizeof(ARMCIX_DCMF_Connection_t) * size);
  bzero ((void *)__connection, sizeof(ARMCIX_DCMF_Connection_t) * size);

  void * base  = NULL;
  size_t bytes = (size_t) -1;

  unsigned i;
  for (i = 0; i < size; i++)
  {
    __connection[i].peer = i;
#warning fix memregion setup to handle non-global address space pinning.
    //DCMF_Result result =
      DCMF_Memregion_create (&__connection[i].local_mem_region,
                             &bytes, (size_t) -1, NULL, 0);
  }

  // Register a send protocol to exchange memory regions
  DCMF_Protocol_t send_protocol;
  DCMF_Send_Configuration_t send_configuration = {
    DCMF_DEFAULT_SEND_PROTOCOL,
    DCMF_DEFAULT_NETWORK,
    ARMCIX_DCMF_RecvMemregion1,
    __connection,
    ARMCIX_DCMF_RecvMemregion2,
    __connection
  };
  DCMF_Send_register (&send_protocol, &send_configuration);

  DCMF_Request_t request;
  volatile unsigned active;
  DCMF_Callback_t cb_done = { ARMCIX_DCMF_cb_decrement, (void *) &active };

  // Exchange the memory regions
  __memregions_to_receive = size;
  for (i = 0; i < size; i++)
  {
    unsigned peer = (rank+i)%size;
    active = 1;
    DCMF_Send (&send_protocol,
               &request,
               cb_done,
               DCMF_SEQUENTIAL_CONSISTENCY,
               peer,
               sizeof(DCMF_Memregion_t),
               (char *) &__connection[peer].local_mem_region,
               (DCQuad *) NULL,
               0);
    while (active) DCMF_Messager_advance();
  }
  while (__memregions_to_receive) DCMF_Messager_advance();

  DCMF_CriticalSection_exit(0);
}