예제 #1
0
int armci_direct_vector(request_header_t *msginfo , armci_giov_t darr[],
       int len, int proc){
int bufsize=0,bytes=0,s;
    for(s=0; s<len; s++){
        bytes   += darr[s].ptr_array_len * darr[s].bytes;/* data */
        bufsize += darr[s].ptr_array_len *sizeof(void*)+2*sizeof(int);/*descr*/
    }
     bufsize += bytes + sizeof(long) +2*sizeof(double) +8;
    if(msginfo->operation==GET)
       bufsize = msginfo->dscrlen+sizeof(request_header_t);
    if(msginfo->operation==PUT){
	msginfo->datalen=0;
        msginfo->bytes=msginfo->dscrlen;
	bufsize=msginfo->dscrlen+sizeof(request_header_t);
    }
    armci_send_req(proc, msginfo, bufsize);
    if(msginfo->operation==GET){
       bytes=armci_RecvVectorFromSocket(SRV_sock[armci_clus_id(proc)],darr,len,
                 (struct iovec *)((char*)(msginfo+1)+msginfo->dscrlen) );
    }
    if(msginfo->operation==PUT){
       bytes=armci_SendVectorToSocket(SRV_sock[armci_clus_id(proc)],darr,len,
                 (struct iovec *)((char*)(msginfo+1)+msginfo->dscrlen) );
    }
    return(bytes);
}
예제 #2
0
파일: groups.c 프로젝트: arnolda/scafacos
static void get_group_clus_id(ARMCI_iGroup *igroup, int grp_nproc, 
                              int *grp_clus_id) {
    int i, *ranks1, *ranks2;
#ifdef ARMCI_GROUP
    assert(grp_nproc<=igroup->grp_attr.nproc);
    for(i=0; i<grp_nproc; i++) {
      grp_clus_id[i] = armci_clus_id(igroup->grp_attr.proc_list[i]);
    }
#else
    MPI_Group group2;
    
    /* Takes the list of processes from one group and attempts to determine
     * the corresponding ranks in a second group (here, MPI_COMM_WORLD) */

    ranks1 = (int *)malloc(2*grp_nproc*sizeof(int));
    ranks2 = ranks1 + grp_nproc;
    for(i=0; i<grp_nproc; i++) ranks1[i] = i;
    MPI_Comm_group(MPI_COMM_WORLD, &group2);
    MPI_Group_translate_ranks(igroup->igroup, grp_nproc, ranks1, group2, ranks2);
    
    /* get the clus_id of processes */
    for(i=0; i<grp_nproc; i++) grp_clus_id[i] = armci_clus_id(ranks2[i]);
    free(ranks1);
#endif
}
예제 #3
0
파일: elan.c 프로젝트: arnolda/scafacos
/*\ send request to server thread
\*/
int armci_send_req_msg(int proc, void *vbuf, int len)
{
    char *buf = (char*)vbuf;
    request_header_t *msginfo = (request_header_t *)buf;
    int cluster = armci_clus_id(proc);
    int size=_ELAN_SLOTSIZE;
    int proc_serv = armci_clus_info[cluster].master;

    ops_pending_ar[cluster]++;

    if((msginfo->dscrlen+msginfo->datalen)> MSG_DATA_LEN){
      /* set message tag -> has pointer to client buffer with descriptor+data */
      msginfo->tag = (void *)(buf + sizeof(request_header_t));
      if(DEBUG_){ printf("%d:in send &tag=%p tag=%p\n",armci_me,&msginfo->tag,
                msginfo->tag); fflush(stdout);
      }
    } else /* null tag means buffer is free -- true after elan_queueReq*/;

    elan_queueReq(mq, proc_serv, vbuf, size); /* vbuf is sent/copied out */
    
#if 0
    if(armci_me==0){
      printf("%d sent request %d to (%d,%d)\n",armci_me,ops_pending_ar[proc], 
      proc,proc_serv); fflush(stdout);
    }
#endif

    return 0;
}
/*\ client receives data from server
\*/
char *armci_ReadFromDirect (int proc, request_header_t *msginfo, int len)
{

    int server;
    int clus_id = armci_clus_id(proc);
    MPI_Status status;

    server = armci_clus_info[clus_id].master;

    armci_mpi2_debug(armci_me, "armci_ReadFromDirect: proc=%d, server=%d, "
                     "msginfo=%p, bytes=%d (op=%d)\n", proc, server, msginfo,
                     len, msginfo->operation);
    MPI_Check(
        MPI_Recv(msginfo + 1, len, MPI_BYTE, server, ARMCI_MPI_SERVER2CLIENT_TAG,
                 ARMCI_COMM_WORLD, &status)
    );


    armci_mpi2_debug(armci_me, "recv msg from server(%d), fwd by client %d\n",
                     server, proc);

    {
        int count;
        MPI_Get_count(&status, MPI_BYTE, &count);
        if (count != len)
        {
            armci_mpi2_debug(armci_me, "armci_ReadFromDirect: got %d bytes, "
                             "expected %d bytes\n", count, len);
            armci_die("armci_ReadFromDirect: MPI_Recv failed.", count);
        }
    }

    return (char *) (msginfo+1);
}
/*\ client sends strided data + request to server
\*/
int armci_send_req_msg_strided(int proc, request_header_t *msginfo,char *ptr,
                               int strides, int stride_arr[], int count[])
{
    int server;
    int clus_id = armci_clus_id(proc);
    int bytes;

    /* Abhinav Vishnu */
    server = armci_clus_info[clus_id].master;

    armci_mpi2_debug(armci_me, "armci_send_req_msg_strided: proc=%d server=%d "
                     "bytes=%d (op=%d)\n", proc, server, msginfo->datalen,
                     msginfo->operation);


    /* we write header + descriptor of strided data  */
    bytes = sizeof(request_header_t) + msginfo->dscrlen;
    armci_send_req_msg(proc, msginfo, bytes);

    {
        /* for larger blocks write directly thus avoiding memcopy */
        armci_mpi_strided_c2s(SEND, ptr, strides, stride_arr, count, server,
                              ARMCI_COMM_WORLD);
    }


    armci_mpi2_debug(armci_me, "armci_send_req_msg_strided(): send msg to "
                     "server(%d), to fwd to client %d\n", server, proc);

    return 0;
}
예제 #6
0
파일: memlock.c 프로젝트: fweik/scafacos
/*\ simple locking scheme that ignores addresses
\*/
void armci_lockmem_(void *pstart, void *pend, int proc)
{
#ifdef BGML
    bgml_lockmem(pstart, pend, proc);
#else

#if defined(CLUSTER) && !defined(SGIALTIX)
    int lock = (proc-armci_clus_info[armci_clus_id(proc)].master)%NUM_LOCKS;
#else
    int lock = 0;
#endif

    if(DEBUG_) {
        printf("%d: armci_lockmem_ proc=%d lock=%d\n",armci_me,proc,lock);
        fflush(stdout);
    }

    NATIVE_LOCK(lock,proc);
#   ifdef LAPI
    {
        extern int kevin_ok;
        kevin_ok=0;
    }
#   endif
    if(DEBUG_) {
        printf("%d: armci_lockmem_ done\n",armci_me);
        fflush(stdout);
    }
#endif
}
예제 #7
0
/*\ client receives strided data from server
\*/
void armci_ReadStridedFromDirect(int proc, request_header_t* msginfo,
                                 void *ptr, int strides, int stride_arr[],
                                 int count[])
{

    int server=armci_clus_id(proc);
    
    armci_mpi2_debug(armci_me, "armci_ReadStridedFromDirect: proc=%d "
                     "stride_levels=%d, server=%d bytes=%d (op=%d)\n",
                     proc, strides, server, msginfo->datalen,
                     msginfo->operation);

    
    if( !(server >= 0 && server < armci_nserver) )
       armci_die("armci_ReadStridedFromDirect: Invalid server.", 0);

#ifdef MPI_USER_DEF_DATATYPE
    if(strides > 0) 
    {
       armci_mpi_strided2(RECV, ptr, strides, stride_arr, count, server,
                          MPI_COMM_CLIENT2SERVER);
    }
    else
#endif
    {
       armci_mpi_strided(RECV, ptr, strides, stride_arr, count, server,
                         MPI_COMM_CLIENT2SERVER);
    }
}
예제 #8
0
/*\ client sends strided data + request to server
\*/
int armci_send_req_msg_strided(int proc, request_header_t *msginfo,char *ptr,
                               int strides, int stride_arr[], int count[])
{
int cluster = armci_clus_id(proc);
int stat, bytes;

    if(DEBUG_){
      printf("%d:armci_send_req_msg_strided: op=%d to=%d bytes= %d \n",armci_me,
             msginfo->operation,proc,msginfo->datalen);
      fflush(stdout);
    }

    /* we write header + data descriptor */
    bytes = sizeof(request_header_t) + msginfo->dscrlen;

    THREAD_LOCK(armci_user_threads.net_lock);

    stat = armci_WriteToSocket(SRV_sock[cluster], msginfo, bytes);
    if(stat<0)armci_die("armci_send_strided:write failed",stat);
#if defined(USE_SOCKET_VECTOR_API)
    if(msginfo->operation==PUT && msginfo->datalen==0)
        armci_SendStridedToSocket( SRV_sock[cluster],ptr,stride_arr,count,
             strides,(struct iovec *)(msginfo+1) );
    else
#endif
    /* for larger blocks write directly to socket thus avoiding memcopy */
    armci_write_strided_sock(ptr, strides,stride_arr,count,SRV_sock[cluster]);

    THREAD_UNLOCK(armci_user_threads.net_lock);

    return 0;
}
예제 #9
0
void PARMCI_Fence(int proc)
{
int i;

#if defined(DATA_SERVER) && !(defined(GM) && defined(ACK_FENCE))
//   printf("%d [cp] fence_arr(%d)=%d\n",armci_me,proc,FENCE_ARR(proc));
     if(FENCE_ARR(proc) && (armci_nclus >1)){

           int cluster = armci_clus_id(proc);
           int master=armci_clus_info[cluster].master;

           armci_rem_ack(cluster);

           /* one ack per cluster node suffices */
           /* note, in multi-threaded case it will only clear for current thread */
           bzero(&FENCE_ARR(master),armci_clus_info[cluster].nslave);
     }
#elif defined(BGML)
     BGML_WaitProc(proc);
     MEM_FENCE;
#else
     FENCE_NODE(proc);
     MEM_FENCE;
#endif
}
예제 #10
0
void ARMCI_DoFence(int proc)
{
int i;
  if(!SAMECLUSNODE(proc) && (armci_nclus >1)){
  int cluster = armci_clus_id(proc);
    armci_rem_ack(cluster);
  }
}
예제 #11
0
파일: gpc.c 프로젝트: bcernohous/ga
/*\ release lock in a callback function executed in context of processor "proc"
\*/
void ARMCI_Gpc_unlock(int proc)
{
#if defined(CLUSTER) && !defined(SGIALTIX)
    int lock = (proc-armci_clus_info[armci_clus_id(proc)].master)%NUM_LOCKS;
#else
    int lock = 0;
#endif
    NATIVE_UNLOCK(lock,proc);
}
예제 #12
0
/* return domain ID of the specified process */
int armci_domain_id(armci_domain_t domain, int glob_proc_id)
{
    int id = glob_proc_id;

    if(id <0 || id >= armci_nproc)  {
        armci_die2("armci domain error",id,armci_nproc);
    }

    return armci_clus_id(glob_proc_id);
}
예제 #13
0
char *armci_ReadFromDirect(int proc, request_header_t * msginfo, int len)
{
int cluster=armci_clus_id(proc);
int stat;

    if(DEBUG_){
      printf("%d:armci_ReadFromDirect:  from %d \n",armci_me,proc);
      fflush(stdout);
    }
    stat =armci_ReadFromSocket(SRV_sock[cluster],msginfo+1,len);
    if(stat<0)armci_die("armci_rcv_data: read failed",stat);
    return(char*)(msginfo+1);
}
예제 #14
0
void armci_unlockmem_(int proc)
{

#if defined(CLUSTER) && !defined(SGIALTIX) 
    int lock = (proc-armci_clus_info[armci_clus_id(proc)].master)%NUM_LOCKS;
#else
    int lock = 0;
#endif
    if(DEBUG_){
      printf("%d: armci_unlockmem_ proc=%d lock=%d\n",armci_me,proc,lock);
      fflush(stdout);
    }
     NATIVE_UNLOCK(lock,proc);
}
예제 #15
0
파일: rmw.c 프로젝트: dmlb2000/nwchem-cml
void armci_generic_rmw(int op, void *ploc, void *prem, int extra, int proc)
{
#if defined(CLUSTER) && !defined(SGIALTIX)
    int lock = (proc-armci_clus_info[armci_clus_id(proc)].master)%NUM_LOCKS;
#else
    int lock = 0;
#endif

    ARMCI_PR_DBG("enter",0);
    NATIVE_LOCK(lock,proc);
    switch (op) {
      case ARMCI_FETCH_AND_ADD:
                armci_get(prem,ploc,sizeof(int),proc);
                _a_temp = *(int*)ploc + extra;
                armci_put(&_a_temp,prem,sizeof(int),proc);
           break;
      case ARMCI_FETCH_AND_ADD_LONG:
                armci_get(prem,ploc,sizeof(long),proc);
                _a_ltemp = *(long*)ploc + extra;
                armci_put(&_a_ltemp,prem,sizeof(long),proc);
           break;
      case ARMCI_SWAP:
#if (defined(__i386__) || defined(__x86_64__))
        if(SERVER_CONTEXT || armci_nclus==1){
	  atomic_exchange(ploc, prem, sizeof(int));
        }
        else 
#endif
        {
	  armci_get(prem,&_a_temp,sizeof(int),proc);
	  armci_put(ploc,prem,sizeof(int),proc);
	  *(int*)ploc = _a_temp; 
        }
	break;
      case ARMCI_SWAP_LONG:
                armci_get(prem,&_a_ltemp,sizeof(long),proc);
                armci_put(ploc,prem,sizeof(long),proc);
                *(long*)ploc = _a_ltemp;
           break;
      default: armci_die("rmw: operation not supported",op);
    }
    /*TODO memfence here*/
    NATIVE_UNLOCK(lock,proc);
    ARMCI_PR_DBG("exit",0);
}
예제 #16
0
/*\ client sends request message to server
\*/
int armci_send_req_msg(int proc, void *buf, int bytes)
{
    int cluster = armci_clus_id(proc);
    request_header_t* msginfo = (request_header_t*)buf;
    int idx, rc;

    THREAD_LOCK(armci_user_threads.net_lock);

    /* mark sockets as active (only if reply is expected?) */
    idx = _armci_buf_to_index(msginfo);
    _armci_active_socks->socks[idx] = SRV_sock[cluster];

    rc = (armci_WriteToSocket(SRV_sock[cluster], buf, bytes) < 0);

    THREAD_UNLOCK(armci_user_threads.net_lock);

    return rc;
}
예제 #17
0
/*\ client receives strided data from server
\*/
void armci_ReadStridedFromDirect(int proc, request_header_t* msginfo, void *ptr,
                                 int strides, int stride_arr[], int count[])
{
int cluster=armci_clus_id(proc);

    if(DEBUG_){
      printf("%d:armci_ReadStridedFromDirect:  from %d \n",armci_me,proc);
      fflush(stdout);
    }
 
#if defined(USE_SOCKET_VECTOR_API)
    if(msginfo->operation==GET && strides > 0)
        armci_RecvStridedFromSocket( SRV_sock[cluster],ptr,stride_arr,count,
             strides,(struct iovec *)((char*)(msginfo+1)+msginfo->dscrlen));
    else
#endif

    armci_read_strided_sock(ptr, strides, stride_arr, count, SRV_sock[cluster]);
}
예제 #18
0
파일: memlock.c 프로젝트: fweik/scafacos
void armci_unlockmem_(int proc)
{
#ifdef BGML
    bgml_unlockmem(proc);
#else

#if defined(CLUSTER) && !defined(SGIALTIX)
    int lock = (proc-armci_clus_info[armci_clus_id(proc)].master)%NUM_LOCKS;
#else
    int lock = 0;
#endif
    NATIVE_UNLOCK(lock,proc);
#   ifdef LAPI
    {
        extern int kevin_ok;
        kevin_ok=1;
    }
#   endif
#endif
}
예제 #19
0
/*\ client sends request message to server
\*/
int armci_send_req_msg (int proc, void *buf, int bytes)
{
    int clus_id = armci_clus_id(proc);
    int server ;

    /* Abhinav Vishnu */
    server = armci_clus_info[clus_id].master;

    armci_mpi2_debug(armci_me, "armci_send_req_msg(): proc=%d, server=%d, "
                     "buf=%p, bytes=%d\n", proc, server, buf, bytes);

    MPI_Check(
        MPI_Send(buf, bytes, MPI_BYTE, server, ARMCI_MPI_CLIENT2SERVER_TAG,
                 ARMCI_COMM_WORLD)
    );
    armci_mpi2_debug(armci_me, "armci_send_req_msg(): send msg to server(%d), to"
                     "fwd to client %d\n", server, proc);

    return 0;
}
예제 #20
0
파일: elan.c 프로젝트: arnolda/scafacos
void armci_elan_fence(int p)
{
    long loop=0;
    int cluster = armci_clus_id(p);
    ops_t *buf = armci_elan_fence_arr[armci_me] + cluster;
    long res = ops_pending_ar[cluster] - armci_check_int_val(buf);

#if 0
    if(ops_pending_ar[cluster])
    printf("%d: client fencing proc=%d fence=%p slot %p pending=%d got=%d\n", 
           armci_me, p, armci_elan_fence_arr[armci_me], buf, 
           ops_pending_ar[cluster], armci_check_int_val(buf)); fflush(stdout);
#endif

    while(res){
       if(++loop == 1000) { loop=0; usleep(1);  }
       armci_util_spin(loop, buf);
       res = ops_pending_ar[cluster] - armci_check_int_val(buf);
    }
}
예제 #21
0
/*\ client receives strided data from server
\*/
void armci_ReadStridedFromDirect(int proc, request_header_t* msginfo,
                                 void *ptr, int strides, int stride_arr[],
                                 int count[])
{

    int server;
    int clus_id = armci_clus_id(proc);

    /* Abhinav Vishnu */
    server = armci_clus_info[clus_id].master;

    armci_mpi2_debug(armci_me, "armci_ReadStridedFromDirect: proc=%d "
                     "stride_levels=%d, server=%d bytes=%d (op=%d)\n",
                     proc, strides, server, msginfo->datalen,
                     msginfo->operation);

    {
        armci_mpi_strided_c2s(RECV, ptr, strides, stride_arr, count, server,
                              ARMCI_COMM_WORLD);
    }
}
예제 #22
0
/*\ client sends request message to server
\*/
int armci_send_req_msg (int proc, void *buf, int bytes)
{
  int server = armci_clus_id(proc);

  armci_mpi2_debug(armci_me, "armci_send_req_msg(): proc=%d, server=%d, "
                   "buf=%p, bytes=%d\n", proc, server, buf, bytes);
  
  if( !(server >= 0 && server < armci_nserver) )
     armci_die("armci_send_req_msg: Invalid server.", 0);

#ifdef MULTIPLE_BUFS
  /**
   * Sequentially ordered tags to ensure flow control at the server side.
   * For example, a put followed by get from a client should be processed in
   * ORDER at the server side. If we don't have the flow control, the server
   * might process the get request first instead of put (and thus violating
   * ARMCI's ordering semantics.
   */
  ((request_header_t*)buf)->tag = _armci_mpi_tag[server];
  MPI_Check(
     MPI_Send(buf, bytes, MPI_BYTE, server, ARMCI_MPI_SPAWN_TAG,
              MPI_COMM_CLIENT2SERVER)
     );

  _armci_mpi_tag[server]++;
  if(_armci_mpi_tag[server] > ARMCI_MPI_SPAWN_TAG_END) 
     _armci_mpi_tag[server] = ARMCI_MPI_SPAWN_TAG_BEGIN;
  
#else
  MPI_Check(
     MPI_Send(buf, bytes, MPI_BYTE, server, ARMCI_MPI_SPAWN_TAG,
              MPI_COMM_CLIENT2SERVER)
     );
#endif
  armci_mpi2_debug(armci_me, "armci_send_req_msg(): send msg to server(%d), to"
                   "fwd to client %d\n", server, proc);

  return 0;
}
예제 #23
0
파일: fence.c 프로젝트: dmlb2000/nwchem-cml
void PARMCI_Fence(int proc)
{
#if defined(DATA_SERVER) && !(defined(GM) && defined(ACK_FENCE))
     if(FENCE_ARR(proc) && (armci_nclus >1)){

           int cluster = armci_clus_id(proc);
           int master = armci_clus_info[cluster].master;

           armci_rem_ack(cluster);

           bzero(&FENCE_ARR(master),
                   armci_clus_info[cluster].nslave);
     }
#elif defined(ARMCIX)
     ARMCIX_Fence (proc);
#elif defined(BGML)
     BGML_WaitProc(proc);
     MEM_FENCE;
#else
     FENCE_NODE(proc);
     MEM_FENCE;
#endif
}
예제 #24
0
/*\ client receives data from server
\*/
char *armci_ReadFromDirect (int proc, request_header_t *msginfo, int len)
{

    int server = armci_clus_id(proc);
    MPI_Status status;

    armci_mpi2_debug(armci_me, "armci_ReadFromDirect: proc=%d, server=%d, "
                     "msginfo=%p, bytes=%d (op=%d)\n", proc, server, msginfo,
                     len, msginfo->operation);
    
    if( !(server >= 0 && server < armci_nserver) )
       armci_die("armci_ReadFromDirect: Invalid server.", 0);
    
    MPI_Check(
       MPI_Recv(msginfo + 1, len, MPI_BYTE, server, ARMCI_MPI_SPAWN_TAG,
                MPI_COMM_CLIENT2SERVER, &status)
       );

    
    armci_mpi2_debug(armci_me, "recv msg from server(%d), fwd by client %d\n",
                     server, proc);

#if MPI_SPAWN_DEBUG
    {
       int count;
       MPI_Get_count(&status, MPI_BYTE, &count);
       if (count != len) 
       {
          armci_mpi2_debug(armci_me, "armci_ReadFromDirect: got %d bytes, "
                           "expected %d bytes\n", count, len);
          armci_die("armci_ReadFromDirect: MPI_Recv failed.", count);
       }
    }
#endif
    
 return (char *) (msginfo+1);
}
예제 #25
0
/*\ client sends strided data + request to server
\*/
int armci_send_req_msg_strided(int proc, request_header_t *msginfo,char *ptr,
                               int strides, int stride_arr[], int count[])
{
    int server = armci_clus_id(proc);
    int bytes;

    armci_mpi2_debug(armci_me, "armci_send_req_msg_strided: proc=%d server=%d "
                     "bytes=%d (op=%d)\n", proc, server, msginfo->datalen,
                     msginfo->operation);

    THREAD_LOCK(armci_user_threads.net_lock);

    /* we write header + descriptor of strided data  */
    bytes = sizeof(request_header_t) + msginfo->dscrlen;
    armci_send_req_msg(proc, msginfo, bytes);
    
#ifdef MPI_USER_DEF_DATATYPE
    if(strides>0) 
    {
       armci_mpi_strided2(SEND, ptr, strides, stride_arr, count, server,
                          MPI_COMM_CLIENT2SERVER);
    }
    else
#endif
    {
       /* for larger blocks write directly thus avoiding memcopy */
       armci_mpi_strided(SEND, ptr, strides, stride_arr, count, server,
                         MPI_COMM_CLIENT2SERVER);
    }
       
    THREAD_UNLOCK(armci_user_threads.net_lock);

    armci_mpi2_debug(armci_me, "armci_send_req_msg_strided(): send msg to "
                     "server(%d), to fwd to client %d\n", server, proc);

    return 0;
}
예제 #26
0
파일: memlock.c 프로젝트: fweik/scafacos
/*\ acquire exclusive LOCK to MEMORY area <pstart,pend> owned by process "proc"
 *   . only one area can be locked at a time by the calling process
 *   . must unlock it with armci_unlockmem
\*/
void armci_lockmem(void *start, void *end, int proc)
{
#ifdef ARMCIX
    ARMCIX_Lockmem (start, end, proc);
#else
    register void* pstart, *pend;
    register  int slot, avail=0;
    int turn=0, conflict=0;
    memlock_t *memlock_table;
#if defined(CLUSTER) && !defined(SGIALTIX)
    int lock = (proc-armci_clus_info[armci_clus_id(proc)].master)%NUM_LOCKS;
#else
    int lock = 0;
#endif

#ifdef CORRECT_PTR
    if(! *armci_use_memlock_table) {
        /* if offset invalid, use dumb locking scheme ignoring addresses */
        armci_lockmem_(start, end, proc);
        return;
    }

#  ifndef SGIALTIX
    /* when processes are attached to a shmem region at different addresses,
     * addresses written to memlock table must be adjusted to the node master
     */
    if(armci_mem_offset) {
        start = armci_mem_offset + (char*)start;
        end   = armci_mem_offset + (char*)end;
    }
#  endif
#endif

    if(DEBUG_) {
        printf("%d: calling armci_lockmem for %d range %p -%p\n",
               armci_me, proc, start,end);
        fflush(stdout);
    }
    memlock_table = (memlock_t*)memlock_table_array[proc];


#ifdef ALIGN_ADDRESS
    /* align address range on cache line boundary to avoid false sharing */
    pstart = ALIGN_ADDRESS(start);
    pend = CALGN -1 + ALIGN_ADDRESS(end);
#else
    pstart=start;
    pend =end;
#endif

#ifdef CRAY_SHMEM
    {   /* adjust according the remote process raw address */
        long bytes = (long) ((char*)pend-(char*)pstart);
        extern void* armci_shmalloc_remote_addr(void *ptr, int proc);
        pstart = armci_shmalloc_remote_addr(pstart, proc);
        pend   = (char*)pstart + bytes;
    }
#endif
#ifdef SGIALTIX
    if (proc == armci_me) {
        pstart = shmem_ptr(pstart,armci_me);
        pend = shmem_ptr(pend,armci_me);
    }
    /* In SGI Altix  processes are attached to a shmem region at different
    addresses. Addresses written to memlock table must be adjusted to
    the node master
     */
    if(ARMCI_Uses_shm()) {
        int i, seg_id=-1;
        size_t tile_size,offset;
        void *start_addr, *end_addr;
        for(i=0; i<seg_count; i++) {
            tile_size = armci_memoffset_table[i].tile_size;
            start_addr = (void*) ((char*)armci_memoffset_table[i].seg_addr +
                                  proc*tile_size);
            end_addr = (void*) ((char*)start_addr +
                                armci_memoffset_table[i].seg_size);
            /* CHECK: because of too much "span" in armci_lockmem_patch in
             * strided.c, it is not possible to have condition as (commented):*/
            /*if(pstart>=start_addr && pend<=end_addr) {seg_id=i; break;}*/
            if(pstart >= start_addr && pstart <= end_addr) {
                seg_id=i;
                break;
            }
        }
        if(seg_id==-1) armci_die("armci_lockmem: Invalid segment", seg_id);

        offset = armci_memoffset_table[seg_id].mem_offset;
        pstart = ((char*)pstart + offset);
        pend = ((char*)pend + offset);
    }
#endif

    while(1) {
        NATIVE_LOCK(lock,proc);

        armci_get(memlock_table, table, sizeof(table), proc);
        /*        armci_copy(memlock_table, table, sizeof(table));*/

        /* inspect the table */
        conflict = 0;
        avail =-1;
        for(slot = 0; slot < MAX_SLOTS; slot ++) {

            /* nonzero starting address means the slot is occupied */
            if(table[slot].start == NULL) {

                /* remember a free slot to store address range */
                avail = slot;

            } else {
                /*check for conflict: overlap between stored and current range*/
                if(  (pstart >= table[slot].start && pstart <= table[slot].end)
                        || (pend >= table[slot].start && pend <= table[slot].end) ) {

                    conflict = 1;
                    break;

                }
                /*
                printf("%d: locking %ld-%ld (%d) conflict\n",
                    armci_me,  */
            }
        }

        if(avail != -1 && !conflict) break;

        NATIVE_UNLOCK(lock,proc);
        armci_waitsome( ++turn );

    }

    /* we got the memory lock: enter address into the table */
    table[avail].start = pstart;
    table[avail].end = pend;
    armci_put(table+avail,memlock_table+avail,sizeof(memlock_t),proc);

    FENCE_NODE(proc);

    NATIVE_UNLOCK(lock,proc);
    locked_slot = avail;
#endif /* ! ARMCIX */
}
예제 #27
0
/*\ acquire exclusive LOCK to MEMORY area <pstart,pend> owned by process "proc"
 *   . only one area can be locked at a time by the calling process
 *   . must unlock it with armci_unlockmem
\*/
void armci_lockmem(void *start, void *end, int proc)
{
     register void* pstart, *pend;
     register  int slot, avail=0;
     int turn=0, conflict=0;
     memlock_t *memlock_table;
#if defined(CLUSTER) && !defined(SGIALTIX)
    int lock = (proc-armci_clus_info[armci_clus_id(proc)].master)%NUM_LOCKS;
#else
    int lock = 0;
#endif

#ifdef CORRECT_PTR
     if(! *armci_use_memlock_table){
       /* if offset invalid, use dumb locking scheme ignoring addresses */
       armci_lockmem_(start, end, proc); 
       return;
     }

#  ifndef SGIALTIX
     /* when processes are attached to a shmem region at different addresses,
      * addresses written to memlock table must be adjusted to the node master
      */
     if(armci_mem_offset){
        start = armci_mem_offset + (char*)start;
        end   = armci_mem_offset + (char*)end;
     }
#  endif
#endif

     if(DEBUG_){
       printf("%d: calling armci_lockmem for %d range %p -%p\n",
              armci_me, proc, start,end);
       fflush(stdout);
     }
     memlock_table = (memlock_t*)memlock_table_array[proc];


#ifdef ALIGN_ADDRESS
     /* align address range on cache line boundary to avoid false sharing */
     pstart = ALIGN_ADDRESS(start);
     pend = CALGN -1 + ALIGN_ADDRESS(end);
#else
     pstart=start;
     pend =end;
#endif

#ifdef CRAY_SHMEM
     { /* adjust according the remote process raw address */
        long bytes = (long) ((char*)pend-(char*)pstart);
        extern void* armci_shmalloc_remote_addr(void *ptr, int proc);
        pstart = armci_shmalloc_remote_addr(pstart, proc);
        pend   = (char*)pstart + bytes;
     }
#endif
     while(1){
        NATIVE_LOCK(lock,proc);

        armci_get(memlock_table, table, sizeof(table), proc);
/*        armci_copy(memlock_table, table, sizeof(table));*/
        
        /* inspect the table */
        conflict = 0; avail =-1;
        for(slot = 0; slot < MAX_SLOTS; slot ++){

            /* nonzero starting address means the slot is occupied */ 
            if(table[slot].start == NULL){

              /* remember a free slot to store address range */
              avail = slot;  
          
            }else{
              /*check for conflict: overlap between stored and current range*/
              if(  (pstart >= table[slot].start && pstart <= table[slot].end)
                 || (pend >= table[slot].start && pend <= table[slot].end) ){

                  conflict = 1;
                  break;

              }
              /*
              printf("%d: locking %ld-%ld (%d) conflict\n",
                  armci_me,  */
            }
       }
        
       if(avail != -1 && !conflict) break;

       NATIVE_UNLOCK(lock,proc);
       armci_waitsome( ++turn );

     }

     /* we got the memory lock: enter address into the table */
     table[avail].start = pstart;
     table[avail].end = pend;
     armci_put(table+avail,memlock_table+avail,sizeof(memlock_t),proc);

     FENCE_NODE(proc);

     NATIVE_UNLOCK(lock,proc);
     locked_slot = avail;

}
예제 #28
0
int armci_direct_vector_get(request_header_t *msginfo , armci_giov_t darr[],
               int len, int proc)
{
    return armci_RecvVectorFromSocket(SRV_sock[armci_clus_id(proc)],darr,len,
                (struct iovec *)((char*)(msginfo+1)+msginfo->dscrlen) );
}