/*\ client sends strided data + request to server
\*/
int armci_send_req_msg_strided(int proc, request_header_t *msginfo,char *ptr,
                               int strides, int stride_arr[], int count[])
{
    int server;
    int clus_id = armci_clus_id(proc);
    int bytes;

    /* Abhinav Vishnu */
    server = armci_clus_info[clus_id].master;

    armci_mpi2_debug(armci_me, "armci_send_req_msg_strided: proc=%d server=%d "
                     "bytes=%d (op=%d)\n", proc, server, msginfo->datalen,
                     msginfo->operation);


    /* we write header + descriptor of strided data  */
    bytes = sizeof(request_header_t) + msginfo->dscrlen;
    armci_send_req_msg(proc, msginfo, bytes);

    {
        /* for larger blocks write directly thus avoiding memcopy */
        armci_mpi_strided_c2s(SEND, ptr, strides, stride_arr, count, server,
                              ARMCI_COMM_WORLD);
    }


    armci_mpi2_debug(armci_me, "armci_send_req_msg_strided(): send msg to "
                     "server(%d), to fwd to client %d\n", server, proc);

    return 0;
}
/*\ client receives data from server
\*/
char *armci_ReadFromDirect (int proc, request_header_t *msginfo, int len)
{

    int server;
    int clus_id = armci_clus_id(proc);
    MPI_Status status;

    server = armci_clus_info[clus_id].master;

    armci_mpi2_debug(armci_me, "armci_ReadFromDirect: proc=%d, server=%d, "
                     "msginfo=%p, bytes=%d (op=%d)\n", proc, server, msginfo,
                     len, msginfo->operation);
    MPI_Check(
        MPI_Recv(msginfo + 1, len, MPI_BYTE, server, ARMCI_MPI_SERVER2CLIENT_TAG,
                 ARMCI_COMM_WORLD, &status)
    );


    armci_mpi2_debug(armci_me, "recv msg from server(%d), fwd by client %d\n",
                     server, proc);

    {
        int count;
        MPI_Get_count(&status, MPI_BYTE, &count);
        if (count != len)
        {
            armci_mpi2_debug(armci_me, "armci_ReadFromDirect: got %d bytes, "
                             "expected %d bytes\n", count, len);
            armci_die("armci_ReadFromDirect: MPI_Recv failed.", count);
        }
    }

    return (char *) (msginfo+1);
}
/* Create connections between clients and servers */
void armci_init_connections()
{
    armci_mpi2_debug(0, "armci_init_connections\n");
    _armci_buf_init();    /* CHECK: Is this correct ? */
    MPI_Check(MPI_Barrier(ARMCI_COMM_WORLD));
    /* Abhinav Vishnu */
    armci_create_server_MPIprocess();

    armci_mpi2_debug(0, "armci_init_connections completed\n");
}
Beispiel #4
0
/*\ client receives strided data from server
\*/
void armci_ReadStridedFromDirect(int proc, request_header_t* msginfo,
                                 void *ptr, int strides, int stride_arr[],
                                 int count[])
{

    int server=armci_clus_id(proc);
    
    armci_mpi2_debug(armci_me, "armci_ReadStridedFromDirect: proc=%d "
                     "stride_levels=%d, server=%d bytes=%d (op=%d)\n",
                     proc, strides, server, msginfo->datalen,
                     msginfo->operation);

    
    if( !(server >= 0 && server < armci_nserver) )
       armci_die("armci_ReadStridedFromDirect: Invalid server.", 0);

#ifdef MPI_USER_DEF_DATATYPE
    if(strides > 0) 
    {
       armci_mpi_strided2(RECV, ptr, strides, stride_arr, count, server,
                          MPI_COMM_CLIENT2SERVER);
    }
    else
#endif
    {
       armci_mpi_strided(RECV, ptr, strides, stride_arr, count, server,
                         MPI_COMM_CLIENT2SERVER);
    }
}
void armci_wait_for_server()
{
    armci_mpi2_debug(0, "armci_wait_for_server: wait for server to quit\n");
    if (armci_me == armci_master)
    {
        armci_serv_quit();
    }
}
static inline int MPI_Check (int status)
{
    if(status != MPI_SUCCESS)
    {
        armci_mpi2_debug(armci_me, "MPI Check failed.\n");
        armci_die("MPI_Check failed.", 0);
    }
}
/*\ client sends request message to server
\*/
int armci_send_req_msg (int proc, void *buf, int bytes)
{
    int clus_id = armci_clus_id(proc);
    int server ;

    /* Abhinav Vishnu */
    server = armci_clus_info[clus_id].master;

    armci_mpi2_debug(armci_me, "armci_send_req_msg(): proc=%d, server=%d, "
                     "buf=%p, bytes=%d\n", proc, server, buf, bytes);

    MPI_Check(
        MPI_Send(buf, bytes, MPI_BYTE, server, ARMCI_MPI_CLIENT2SERVER_TAG,
                 ARMCI_COMM_WORLD)
    );
    armci_mpi2_debug(armci_me, "armci_send_req_msg(): send msg to server(%d), to"
                     "fwd to client %d\n", server, proc);

    return 0;
}
Beispiel #8
0
/*\ client sends request message to server
\*/
int armci_send_req_msg (int proc, void *buf, int bytes)
{
  int server = armci_clus_id(proc);

  armci_mpi2_debug(armci_me, "armci_send_req_msg(): proc=%d, server=%d, "
                   "buf=%p, bytes=%d\n", proc, server, buf, bytes);
  
  if( !(server >= 0 && server < armci_nserver) )
     armci_die("armci_send_req_msg: Invalid server.", 0);

#ifdef MULTIPLE_BUFS
  /**
   * Sequentially ordered tags to ensure flow control at the server side.
   * For example, a put followed by get from a client should be processed in
   * ORDER at the server side. If we don't have the flow control, the server
   * might process the get request first instead of put (and thus violating
   * ARMCI's ordering semantics.
   */
  ((request_header_t*)buf)->tag = _armci_mpi_tag[server];
  MPI_Check(
     MPI_Send(buf, bytes, MPI_BYTE, server, ARMCI_MPI_SPAWN_TAG,
              MPI_COMM_CLIENT2SERVER)
     );

  _armci_mpi_tag[server]++;
  if(_armci_mpi_tag[server] > ARMCI_MPI_SPAWN_TAG_END) 
     _armci_mpi_tag[server] = ARMCI_MPI_SPAWN_TAG_BEGIN;
  
#else
  MPI_Check(
     MPI_Send(buf, bytes, MPI_BYTE, server, ARMCI_MPI_SPAWN_TAG,
              MPI_COMM_CLIENT2SERVER)
     );
#endif
  armci_mpi2_debug(armci_me, "armci_send_req_msg(): send msg to server(%d), to"
                   "fwd to client %d\n", server, proc);

  return 0;
}
Beispiel #9
0
/*\ client receives data from server
\*/
char *armci_ReadFromDirect (int proc, request_header_t *msginfo, int len)
{

    int server = armci_clus_id(proc);
    MPI_Status status;

    armci_mpi2_debug(armci_me, "armci_ReadFromDirect: proc=%d, server=%d, "
                     "msginfo=%p, bytes=%d (op=%d)\n", proc, server, msginfo,
                     len, msginfo->operation);
    
    if( !(server >= 0 && server < armci_nserver) )
       armci_die("armci_ReadFromDirect: Invalid server.", 0);
    
    MPI_Check(
       MPI_Recv(msginfo + 1, len, MPI_BYTE, server, ARMCI_MPI_SPAWN_TAG,
                MPI_COMM_CLIENT2SERVER, &status)
       );

    
    armci_mpi2_debug(armci_me, "recv msg from server(%d), fwd by client %d\n",
                     server, proc);

#if MPI_SPAWN_DEBUG
    {
       int count;
       MPI_Get_count(&status, MPI_BYTE, &count);
       if (count != len) 
       {
          armci_mpi2_debug(armci_me, "armci_ReadFromDirect: got %d bytes, "
                           "expected %d bytes\n", count, len);
          armci_die("armci_ReadFromDirect: MPI_Recv failed.", count);
       }
    }
#endif
    
 return (char *) (msginfo+1);
}
Beispiel #10
0
/*\ client sends strided data + request to server
\*/
int armci_send_req_msg_strided(int proc, request_header_t *msginfo,char *ptr,
                               int strides, int stride_arr[], int count[])
{
    int server = armci_clus_id(proc);
    int bytes;

    armci_mpi2_debug(armci_me, "armci_send_req_msg_strided: proc=%d server=%d "
                     "bytes=%d (op=%d)\n", proc, server, msginfo->datalen,
                     msginfo->operation);

    THREAD_LOCK(armci_user_threads.net_lock);

    /* we write header + descriptor of strided data  */
    bytes = sizeof(request_header_t) + msginfo->dscrlen;
    armci_send_req_msg(proc, msginfo, bytes);
    
#ifdef MPI_USER_DEF_DATATYPE
    if(strides>0) 
    {
       armci_mpi_strided2(SEND, ptr, strides, stride_arr, count, server,
                          MPI_COMM_CLIENT2SERVER);
    }
    else
#endif
    {
       /* for larger blocks write directly thus avoiding memcopy */
       armci_mpi_strided(SEND, ptr, strides, stride_arr, count, server,
                         MPI_COMM_CLIENT2SERVER);
    }
       
    THREAD_UNLOCK(armci_user_threads.net_lock);

    armci_mpi2_debug(armci_me, "armci_send_req_msg_strided(): send msg to "
                     "server(%d), to fwd to client %d\n", server, proc);

    return 0;
}
/**
 * Create server processes. This is called in armci_start_server.
 * Must be called after armci_init_clusinfo().
 */
void armci_create_server_MPIprocess ()
{
    int rank, size, flag, i;

    MPI_Check(MPI_Initialized(&flag));
    if (flag == 0)
        armci_die("ARMCI error: MPI_Init must be called before PARMCI_Init()",0);

    MPI_Check(MPI_Comm_rank(ARMCI_COMM_WORLD, &rank));
    MPI_Check(MPI_Comm_size(ARMCI_COMM_WORLD, &size));

    armci_nserver = armci_nclus;

    /* makesure all processes sync here. CHECK: does it ensure global sync ? */
    MPI_Check(MPI_Barrier(ARMCI_COMM_WORLD));

    armci_mpi2_debug(0, "armci_create_server_MPIprocess: Servers spawned!\n");
}
/*\ client receives strided data from server
\*/
void armci_ReadStridedFromDirect(int proc, request_header_t* msginfo,
                                 void *ptr, int strides, int stride_arr[],
                                 int count[])
{

    int server;
    int clus_id = armci_clus_id(proc);

    /* Abhinav Vishnu */
    server = armci_clus_info[clus_id].master;

    armci_mpi2_debug(armci_me, "armci_ReadStridedFromDirect: proc=%d "
                     "stride_levels=%d, server=%d bytes=%d (op=%d)\n",
                     proc, strides, server, msginfo->datalen,
                     msginfo->operation);

    {
        armci_mpi_strided_c2s(RECV, ptr, strides, stride_arr, count, server,
                              ARMCI_COMM_WORLD);
    }
}
Beispiel #13
0
void armci_client_connect_to_servers()
{
    armci_mpi2_debug(0, "armci_client_connect_to_servers\n");   
}
Beispiel #14
0
/* Create connections between clients and servers */
void armci_init_connections()
{
    armci_mpi2_debug(0, "armci_init_connections\n");
    _armci_buf_init();    /* CHECK: Is this correct ? */
}
Beispiel #15
0
/**
 * Create server processes. This is called in armci_start_server.
 * Must be called after armci_init_clusinfo().
 */
void armci_create_server_MPIprocess ()
{
    int rank, size, flag, i;

    MPI_Initialized(&flag);
    if (flag == 0)
       armci_die("ARMCI error: MPI_Init must be called before PARMCI_Init()",0);
    
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    /* spawn one data server process (i.e. additional MPI proc) on each node */
    armci_mpi2_spawn();
    
    /**
     * Armci masters send the following info to their corresponding server as
     * the server was not part of the initialization step in PARMCI_Init()
     *    1. cluster info ( i.e. armci_init_clusinfo() )
     *    2. lock info    ( i.e.armci_allocate_locks() )
     */
    
    if(armci_me == armci_master) {
       int msg[3];
       long shm_info[3], shmoffset;
       int shmid;
       size_t shmsize;
       
       /**
        * 1. Cluster info
        */
       msg[0] = ARMCI_MPI_SPAWN_INIT_TAG + armci_clus_me; /* for validation */
       msg[1] = armci_me;
       msg[2] = armci_clus_info[armci_clus_me].nslave;
       MPI_Send(msg, 3, MPI_INT, armci_clus_me, ARMCI_MPI_SPAWN_INIT_TAG,
                MPI_COMM_CLIENT2SERVER);

       /* send the entire clus info to its data server */
       MPI_Send(armci_clus_info, armci_nclus*sizeof(armci_clus_t), MPI_BYTE,
                armci_clus_me, ARMCI_MPI_SPAWN_INIT_TAG,
                MPI_COMM_CLIENT2SERVER);
       
       /**
        * 2. lock info
        */
       armci_get_shmem_info((char*)_armci_int_mutexes, &shmid, &shmoffset,
                            &shmsize);
       shm_info[0] = (long) shmid;
       shm_info[1] = (long) shmoffset;
       shm_info[2] = (long) shmsize;
       
       MPI_Send(shm_info, 3, MPI_LONG, armci_clus_me, ARMCI_MPI_SPAWN_INIT_TAG,
                MPI_COMM_CLIENT2SERVER);       
    }
     

    /* initialize tags for flow control */
    _armci_mpi_tag = (int*) malloc(armci_nserver*sizeof(int));
    for(i=0; i<armci_nserver; i++)
       _armci_mpi_tag[i]=ARMCI_MPI_SPAWN_TAG_BEGIN;
    
    /* makesure all processes sync here. CHECK: does it ensure global sync ? */
    MPI_Barrier(MPI_COMM_WORLD);
    
    armci_mpi2_debug(0, "armci_create_server_MPIprocess: Servers spawned!\n");
}
Beispiel #16
0
static void armci_mpi2_spawn() 
{

    int i;
    char server_program[100];
    char **command_arr=NULL, **hostname_arr=NULL, **nid_arr=NULL;
    int *size_arr=NULL;
    MPI_Info *info_arr;
    
    /* we need to start 1 data server process on each node. So a total of
       "armci_nclus" data servers */
    armci_nserver = armci_nclus;
    select_server_program(server_program, armci_nserver);
    
    armci_mpi2_debug(0, "armci_mpi2_init(): Spawning %d data server processes "
                     "running %s\n", armci_nserver, server_program);

    /* allocate necessary data structures */
    {
       command_arr  = (char**)    malloc(armci_nserver * sizeof(char*));
       size_arr     = (int*)      malloc(armci_nserver * sizeof(int));
       info_arr     = (MPI_Info*) malloc(armci_nserver * sizeof(MPI_Info));
       hostname_arr = (char**)    malloc(armci_nserver * sizeof(char*));
#ifdef SPAWN_CRAY_XT
       nid_arr      = (char**)    malloc(armci_nserver * sizeof(char*));;
#endif
       for(i=0; i<armci_nserver; i++) 
       {
          hostname_arr[i] = (char*)malloc(MPI_MAX_PROCESSOR_NAME*sizeof(char));
       }

       if(command_arr==NULL || size_arr==NULL || info_arr==NULL ||
          hostname_arr==NULL) 
       {
          armci_die("armci_mpi2_spawn: malloc failed.", 0);
       }
    }
    
    /**
     * 1. root process collects hostnames (i.e. machine names) of where to
     * spawn dataservers. ARMCI masters of respective node will return their
     * hostnames. 
     */
    armci_gather_hostnames(hostname_arr);
    
       
    /** 2. initialize MPI_Comm_spawn_multiple() arguments */
    {   
       for(i=0; i<armci_nserver; i++)
       {
          command_arr[i] = (*_armci_argv)[0];  /*CHECK: path needs fix */
          size_arr[i]    = 1;                /* 1 data server in each node */
          MPI_Info_create(&info_arr[i]);
#ifdef SPAWN_CRAY_XT
          asprintf(&nid_arr[i], "%d", atoi((hostname_arr[i] + 3)));
          MPI_Info_set(info_arr[i], "host", nid_arr[i]); /*portability? */
#else
          MPI_Info_set(info_arr[i], "host", hostname_arr[i]); /*portability? */
#endif
       }
    }

    
    /**
     * 3. MPI_Comm_spawn_multiple(): This is a collective call.
     * Intercommunicator "ds_intercomm" contains only new dataserver processes.
     */
    MPI_Check(
       MPI_Comm_spawn_multiple(armci_nserver, command_arr, MPI_ARGVS_NULL,
                               size_arr, info_arr, ARMCI_ROOT, MPI_COMM_WORLD,
                               &MPI_COMM_CLIENT2SERVER, MPI_ERRCODES_IGNORE)
       );


    {  
       for(i=0; i<armci_nserver; i++)  free(hostname_arr[i]);
       
       free(command_arr);
       free(size_arr);
       free(info_arr);
       free(hostname_arr);
#ifdef SPAWN_CRAY_XT
       free(nid_arr);
#endif
    }
}