int armci_direct_vector(request_header_t *msginfo , armci_giov_t darr[], int len, int proc){ int bufsize=0,bytes=0,s; for(s=0; s<len; s++){ bytes += darr[s].ptr_array_len * darr[s].bytes;/* data */ bufsize += darr[s].ptr_array_len *sizeof(void*)+2*sizeof(int);/*descr*/ } bufsize += bytes + sizeof(long) +2*sizeof(double) +8; if(msginfo->operation==GET) bufsize = msginfo->dscrlen+sizeof(request_header_t); if(msginfo->operation==PUT){ msginfo->datalen=0; msginfo->bytes=msginfo->dscrlen; bufsize=msginfo->dscrlen+sizeof(request_header_t); } armci_send_req(proc, msginfo, bufsize); if(msginfo->operation==GET){ bytes=armci_RecvVectorFromSocket(SRV_sock[armci_clus_id(proc)],darr,len, (struct iovec *)((char*)(msginfo+1)+msginfo->dscrlen) ); } if(msginfo->operation==PUT){ bytes=armci_SendVectorToSocket(SRV_sock[armci_clus_id(proc)],darr,len, (struct iovec *)((char*)(msginfo+1)+msginfo->dscrlen) ); } return(bytes); }
static void get_group_clus_id(ARMCI_iGroup *igroup, int grp_nproc, int *grp_clus_id) { int i, *ranks1, *ranks2; #ifdef ARMCI_GROUP assert(grp_nproc<=igroup->grp_attr.nproc); for(i=0; i<grp_nproc; i++) { grp_clus_id[i] = armci_clus_id(igroup->grp_attr.proc_list[i]); } #else MPI_Group group2; /* Takes the list of processes from one group and attempts to determine * the corresponding ranks in a second group (here, MPI_COMM_WORLD) */ ranks1 = (int *)malloc(2*grp_nproc*sizeof(int)); ranks2 = ranks1 + grp_nproc; for(i=0; i<grp_nproc; i++) ranks1[i] = i; MPI_Comm_group(MPI_COMM_WORLD, &group2); MPI_Group_translate_ranks(igroup->igroup, grp_nproc, ranks1, group2, ranks2); /* get the clus_id of processes */ for(i=0; i<grp_nproc; i++) grp_clus_id[i] = armci_clus_id(ranks2[i]); free(ranks1); #endif }
/*\ send request to server thread \*/ int armci_send_req_msg(int proc, void *vbuf, int len) { char *buf = (char*)vbuf; request_header_t *msginfo = (request_header_t *)buf; int cluster = armci_clus_id(proc); int size=_ELAN_SLOTSIZE; int proc_serv = armci_clus_info[cluster].master; ops_pending_ar[cluster]++; if((msginfo->dscrlen+msginfo->datalen)> MSG_DATA_LEN){ /* set message tag -> has pointer to client buffer with descriptor+data */ msginfo->tag = (void *)(buf + sizeof(request_header_t)); if(DEBUG_){ printf("%d:in send &tag=%p tag=%p\n",armci_me,&msginfo->tag, msginfo->tag); fflush(stdout); } } else /* null tag means buffer is free -- true after elan_queueReq*/; elan_queueReq(mq, proc_serv, vbuf, size); /* vbuf is sent/copied out */ #if 0 if(armci_me==0){ printf("%d sent request %d to (%d,%d)\n",armci_me,ops_pending_ar[proc], proc,proc_serv); fflush(stdout); } #endif return 0; }
/*\ client receives data from server \*/ char *armci_ReadFromDirect (int proc, request_header_t *msginfo, int len) { int server; int clus_id = armci_clus_id(proc); MPI_Status status; server = armci_clus_info[clus_id].master; armci_mpi2_debug(armci_me, "armci_ReadFromDirect: proc=%d, server=%d, " "msginfo=%p, bytes=%d (op=%d)\n", proc, server, msginfo, len, msginfo->operation); MPI_Check( MPI_Recv(msginfo + 1, len, MPI_BYTE, server, ARMCI_MPI_SERVER2CLIENT_TAG, ARMCI_COMM_WORLD, &status) ); armci_mpi2_debug(armci_me, "recv msg from server(%d), fwd by client %d\n", server, proc); { int count; MPI_Get_count(&status, MPI_BYTE, &count); if (count != len) { armci_mpi2_debug(armci_me, "armci_ReadFromDirect: got %d bytes, " "expected %d bytes\n", count, len); armci_die("armci_ReadFromDirect: MPI_Recv failed.", count); } } return (char *) (msginfo+1); }
/*\ client sends strided data + request to server \*/ int armci_send_req_msg_strided(int proc, request_header_t *msginfo,char *ptr, int strides, int stride_arr[], int count[]) { int server; int clus_id = armci_clus_id(proc); int bytes; /* Abhinav Vishnu */ server = armci_clus_info[clus_id].master; armci_mpi2_debug(armci_me, "armci_send_req_msg_strided: proc=%d server=%d " "bytes=%d (op=%d)\n", proc, server, msginfo->datalen, msginfo->operation); /* we write header + descriptor of strided data */ bytes = sizeof(request_header_t) + msginfo->dscrlen; armci_send_req_msg(proc, msginfo, bytes); { /* for larger blocks write directly thus avoiding memcopy */ armci_mpi_strided_c2s(SEND, ptr, strides, stride_arr, count, server, ARMCI_COMM_WORLD); } armci_mpi2_debug(armci_me, "armci_send_req_msg_strided(): send msg to " "server(%d), to fwd to client %d\n", server, proc); return 0; }
/*\ simple locking scheme that ignores addresses \*/ void armci_lockmem_(void *pstart, void *pend, int proc) { #ifdef BGML bgml_lockmem(pstart, pend, proc); #else #if defined(CLUSTER) && !defined(SGIALTIX) int lock = (proc-armci_clus_info[armci_clus_id(proc)].master)%NUM_LOCKS; #else int lock = 0; #endif if(DEBUG_) { printf("%d: armci_lockmem_ proc=%d lock=%d\n",armci_me,proc,lock); fflush(stdout); } NATIVE_LOCK(lock,proc); # ifdef LAPI { extern int kevin_ok; kevin_ok=0; } # endif if(DEBUG_) { printf("%d: armci_lockmem_ done\n",armci_me); fflush(stdout); } #endif }
/*\ client receives strided data from server \*/ void armci_ReadStridedFromDirect(int proc, request_header_t* msginfo, void *ptr, int strides, int stride_arr[], int count[]) { int server=armci_clus_id(proc); armci_mpi2_debug(armci_me, "armci_ReadStridedFromDirect: proc=%d " "stride_levels=%d, server=%d bytes=%d (op=%d)\n", proc, strides, server, msginfo->datalen, msginfo->operation); if( !(server >= 0 && server < armci_nserver) ) armci_die("armci_ReadStridedFromDirect: Invalid server.", 0); #ifdef MPI_USER_DEF_DATATYPE if(strides > 0) { armci_mpi_strided2(RECV, ptr, strides, stride_arr, count, server, MPI_COMM_CLIENT2SERVER); } else #endif { armci_mpi_strided(RECV, ptr, strides, stride_arr, count, server, MPI_COMM_CLIENT2SERVER); } }
/*\ client sends strided data + request to server \*/ int armci_send_req_msg_strided(int proc, request_header_t *msginfo,char *ptr, int strides, int stride_arr[], int count[]) { int cluster = armci_clus_id(proc); int stat, bytes; if(DEBUG_){ printf("%d:armci_send_req_msg_strided: op=%d to=%d bytes= %d \n",armci_me, msginfo->operation,proc,msginfo->datalen); fflush(stdout); } /* we write header + data descriptor */ bytes = sizeof(request_header_t) + msginfo->dscrlen; THREAD_LOCK(armci_user_threads.net_lock); stat = armci_WriteToSocket(SRV_sock[cluster], msginfo, bytes); if(stat<0)armci_die("armci_send_strided:write failed",stat); #if defined(USE_SOCKET_VECTOR_API) if(msginfo->operation==PUT && msginfo->datalen==0) armci_SendStridedToSocket( SRV_sock[cluster],ptr,stride_arr,count, strides,(struct iovec *)(msginfo+1) ); else #endif /* for larger blocks write directly to socket thus avoiding memcopy */ armci_write_strided_sock(ptr, strides,stride_arr,count,SRV_sock[cluster]); THREAD_UNLOCK(armci_user_threads.net_lock); return 0; }
void PARMCI_Fence(int proc) { int i; #if defined(DATA_SERVER) && !(defined(GM) && defined(ACK_FENCE)) // printf("%d [cp] fence_arr(%d)=%d\n",armci_me,proc,FENCE_ARR(proc)); if(FENCE_ARR(proc) && (armci_nclus >1)){ int cluster = armci_clus_id(proc); int master=armci_clus_info[cluster].master; armci_rem_ack(cluster); /* one ack per cluster node suffices */ /* note, in multi-threaded case it will only clear for current thread */ bzero(&FENCE_ARR(master),armci_clus_info[cluster].nslave); } #elif defined(BGML) BGML_WaitProc(proc); MEM_FENCE; #else FENCE_NODE(proc); MEM_FENCE; #endif }
void ARMCI_DoFence(int proc) { int i; if(!SAMECLUSNODE(proc) && (armci_nclus >1)){ int cluster = armci_clus_id(proc); armci_rem_ack(cluster); } }
/*\ release lock in a callback function executed in context of processor "proc" \*/ void ARMCI_Gpc_unlock(int proc) { #if defined(CLUSTER) && !defined(SGIALTIX) int lock = (proc-armci_clus_info[armci_clus_id(proc)].master)%NUM_LOCKS; #else int lock = 0; #endif NATIVE_UNLOCK(lock,proc); }
/* return domain ID of the specified process */ int armci_domain_id(armci_domain_t domain, int glob_proc_id) { int id = glob_proc_id; if(id <0 || id >= armci_nproc) { armci_die2("armci domain error",id,armci_nproc); } return armci_clus_id(glob_proc_id); }
char *armci_ReadFromDirect(int proc, request_header_t * msginfo, int len) { int cluster=armci_clus_id(proc); int stat; if(DEBUG_){ printf("%d:armci_ReadFromDirect: from %d \n",armci_me,proc); fflush(stdout); } stat =armci_ReadFromSocket(SRV_sock[cluster],msginfo+1,len); if(stat<0)armci_die("armci_rcv_data: read failed",stat); return(char*)(msginfo+1); }
void armci_unlockmem_(int proc) { #if defined(CLUSTER) && !defined(SGIALTIX) int lock = (proc-armci_clus_info[armci_clus_id(proc)].master)%NUM_LOCKS; #else int lock = 0; #endif if(DEBUG_){ printf("%d: armci_unlockmem_ proc=%d lock=%d\n",armci_me,proc,lock); fflush(stdout); } NATIVE_UNLOCK(lock,proc); }
void armci_generic_rmw(int op, void *ploc, void *prem, int extra, int proc) { #if defined(CLUSTER) && !defined(SGIALTIX) int lock = (proc-armci_clus_info[armci_clus_id(proc)].master)%NUM_LOCKS; #else int lock = 0; #endif ARMCI_PR_DBG("enter",0); NATIVE_LOCK(lock,proc); switch (op) { case ARMCI_FETCH_AND_ADD: armci_get(prem,ploc,sizeof(int),proc); _a_temp = *(int*)ploc + extra; armci_put(&_a_temp,prem,sizeof(int),proc); break; case ARMCI_FETCH_AND_ADD_LONG: armci_get(prem,ploc,sizeof(long),proc); _a_ltemp = *(long*)ploc + extra; armci_put(&_a_ltemp,prem,sizeof(long),proc); break; case ARMCI_SWAP: #if (defined(__i386__) || defined(__x86_64__)) if(SERVER_CONTEXT || armci_nclus==1){ atomic_exchange(ploc, prem, sizeof(int)); } else #endif { armci_get(prem,&_a_temp,sizeof(int),proc); armci_put(ploc,prem,sizeof(int),proc); *(int*)ploc = _a_temp; } break; case ARMCI_SWAP_LONG: armci_get(prem,&_a_ltemp,sizeof(long),proc); armci_put(ploc,prem,sizeof(long),proc); *(long*)ploc = _a_ltemp; break; default: armci_die("rmw: operation not supported",op); } /*TODO memfence here*/ NATIVE_UNLOCK(lock,proc); ARMCI_PR_DBG("exit",0); }
/*\ client sends request message to server \*/ int armci_send_req_msg(int proc, void *buf, int bytes) { int cluster = armci_clus_id(proc); request_header_t* msginfo = (request_header_t*)buf; int idx, rc; THREAD_LOCK(armci_user_threads.net_lock); /* mark sockets as active (only if reply is expected?) */ idx = _armci_buf_to_index(msginfo); _armci_active_socks->socks[idx] = SRV_sock[cluster]; rc = (armci_WriteToSocket(SRV_sock[cluster], buf, bytes) < 0); THREAD_UNLOCK(armci_user_threads.net_lock); return rc; }
/*\ client receives strided data from server \*/ void armci_ReadStridedFromDirect(int proc, request_header_t* msginfo, void *ptr, int strides, int stride_arr[], int count[]) { int cluster=armci_clus_id(proc); if(DEBUG_){ printf("%d:armci_ReadStridedFromDirect: from %d \n",armci_me,proc); fflush(stdout); } #if defined(USE_SOCKET_VECTOR_API) if(msginfo->operation==GET && strides > 0) armci_RecvStridedFromSocket( SRV_sock[cluster],ptr,stride_arr,count, strides,(struct iovec *)((char*)(msginfo+1)+msginfo->dscrlen)); else #endif armci_read_strided_sock(ptr, strides, stride_arr, count, SRV_sock[cluster]); }
void armci_unlockmem_(int proc) { #ifdef BGML bgml_unlockmem(proc); #else #if defined(CLUSTER) && !defined(SGIALTIX) int lock = (proc-armci_clus_info[armci_clus_id(proc)].master)%NUM_LOCKS; #else int lock = 0; #endif NATIVE_UNLOCK(lock,proc); # ifdef LAPI { extern int kevin_ok; kevin_ok=1; } # endif #endif }
/*\ client sends request message to server \*/ int armci_send_req_msg (int proc, void *buf, int bytes) { int clus_id = armci_clus_id(proc); int server ; /* Abhinav Vishnu */ server = armci_clus_info[clus_id].master; armci_mpi2_debug(armci_me, "armci_send_req_msg(): proc=%d, server=%d, " "buf=%p, bytes=%d\n", proc, server, buf, bytes); MPI_Check( MPI_Send(buf, bytes, MPI_BYTE, server, ARMCI_MPI_CLIENT2SERVER_TAG, ARMCI_COMM_WORLD) ); armci_mpi2_debug(armci_me, "armci_send_req_msg(): send msg to server(%d), to" "fwd to client %d\n", server, proc); return 0; }
void armci_elan_fence(int p) { long loop=0; int cluster = armci_clus_id(p); ops_t *buf = armci_elan_fence_arr[armci_me] + cluster; long res = ops_pending_ar[cluster] - armci_check_int_val(buf); #if 0 if(ops_pending_ar[cluster]) printf("%d: client fencing proc=%d fence=%p slot %p pending=%d got=%d\n", armci_me, p, armci_elan_fence_arr[armci_me], buf, ops_pending_ar[cluster], armci_check_int_val(buf)); fflush(stdout); #endif while(res){ if(++loop == 1000) { loop=0; usleep(1); } armci_util_spin(loop, buf); res = ops_pending_ar[cluster] - armci_check_int_val(buf); } }
/*\ client receives strided data from server \*/ void armci_ReadStridedFromDirect(int proc, request_header_t* msginfo, void *ptr, int strides, int stride_arr[], int count[]) { int server; int clus_id = armci_clus_id(proc); /* Abhinav Vishnu */ server = armci_clus_info[clus_id].master; armci_mpi2_debug(armci_me, "armci_ReadStridedFromDirect: proc=%d " "stride_levels=%d, server=%d bytes=%d (op=%d)\n", proc, strides, server, msginfo->datalen, msginfo->operation); { armci_mpi_strided_c2s(RECV, ptr, strides, stride_arr, count, server, ARMCI_COMM_WORLD); } }
/*\ client sends request message to server \*/ int armci_send_req_msg (int proc, void *buf, int bytes) { int server = armci_clus_id(proc); armci_mpi2_debug(armci_me, "armci_send_req_msg(): proc=%d, server=%d, " "buf=%p, bytes=%d\n", proc, server, buf, bytes); if( !(server >= 0 && server < armci_nserver) ) armci_die("armci_send_req_msg: Invalid server.", 0); #ifdef MULTIPLE_BUFS /** * Sequentially ordered tags to ensure flow control at the server side. * For example, a put followed by get from a client should be processed in * ORDER at the server side. If we don't have the flow control, the server * might process the get request first instead of put (and thus violating * ARMCI's ordering semantics. */ ((request_header_t*)buf)->tag = _armci_mpi_tag[server]; MPI_Check( MPI_Send(buf, bytes, MPI_BYTE, server, ARMCI_MPI_SPAWN_TAG, MPI_COMM_CLIENT2SERVER) ); _armci_mpi_tag[server]++; if(_armci_mpi_tag[server] > ARMCI_MPI_SPAWN_TAG_END) _armci_mpi_tag[server] = ARMCI_MPI_SPAWN_TAG_BEGIN; #else MPI_Check( MPI_Send(buf, bytes, MPI_BYTE, server, ARMCI_MPI_SPAWN_TAG, MPI_COMM_CLIENT2SERVER) ); #endif armci_mpi2_debug(armci_me, "armci_send_req_msg(): send msg to server(%d), to" "fwd to client %d\n", server, proc); return 0; }
void PARMCI_Fence(int proc) { #if defined(DATA_SERVER) && !(defined(GM) && defined(ACK_FENCE)) if(FENCE_ARR(proc) && (armci_nclus >1)){ int cluster = armci_clus_id(proc); int master = armci_clus_info[cluster].master; armci_rem_ack(cluster); bzero(&FENCE_ARR(master), armci_clus_info[cluster].nslave); } #elif defined(ARMCIX) ARMCIX_Fence (proc); #elif defined(BGML) BGML_WaitProc(proc); MEM_FENCE; #else FENCE_NODE(proc); MEM_FENCE; #endif }
/*\ client receives data from server \*/ char *armci_ReadFromDirect (int proc, request_header_t *msginfo, int len) { int server = armci_clus_id(proc); MPI_Status status; armci_mpi2_debug(armci_me, "armci_ReadFromDirect: proc=%d, server=%d, " "msginfo=%p, bytes=%d (op=%d)\n", proc, server, msginfo, len, msginfo->operation); if( !(server >= 0 && server < armci_nserver) ) armci_die("armci_ReadFromDirect: Invalid server.", 0); MPI_Check( MPI_Recv(msginfo + 1, len, MPI_BYTE, server, ARMCI_MPI_SPAWN_TAG, MPI_COMM_CLIENT2SERVER, &status) ); armci_mpi2_debug(armci_me, "recv msg from server(%d), fwd by client %d\n", server, proc); #if MPI_SPAWN_DEBUG { int count; MPI_Get_count(&status, MPI_BYTE, &count); if (count != len) { armci_mpi2_debug(armci_me, "armci_ReadFromDirect: got %d bytes, " "expected %d bytes\n", count, len); armci_die("armci_ReadFromDirect: MPI_Recv failed.", count); } } #endif return (char *) (msginfo+1); }
/*\ client sends strided data + request to server \*/ int armci_send_req_msg_strided(int proc, request_header_t *msginfo,char *ptr, int strides, int stride_arr[], int count[]) { int server = armci_clus_id(proc); int bytes; armci_mpi2_debug(armci_me, "armci_send_req_msg_strided: proc=%d server=%d " "bytes=%d (op=%d)\n", proc, server, msginfo->datalen, msginfo->operation); THREAD_LOCK(armci_user_threads.net_lock); /* we write header + descriptor of strided data */ bytes = sizeof(request_header_t) + msginfo->dscrlen; armci_send_req_msg(proc, msginfo, bytes); #ifdef MPI_USER_DEF_DATATYPE if(strides>0) { armci_mpi_strided2(SEND, ptr, strides, stride_arr, count, server, MPI_COMM_CLIENT2SERVER); } else #endif { /* for larger blocks write directly thus avoiding memcopy */ armci_mpi_strided(SEND, ptr, strides, stride_arr, count, server, MPI_COMM_CLIENT2SERVER); } THREAD_UNLOCK(armci_user_threads.net_lock); armci_mpi2_debug(armci_me, "armci_send_req_msg_strided(): send msg to " "server(%d), to fwd to client %d\n", server, proc); return 0; }
/*\ acquire exclusive LOCK to MEMORY area <pstart,pend> owned by process "proc" * . only one area can be locked at a time by the calling process * . must unlock it with armci_unlockmem \*/ void armci_lockmem(void *start, void *end, int proc) { #ifdef ARMCIX ARMCIX_Lockmem (start, end, proc); #else register void* pstart, *pend; register int slot, avail=0; int turn=0, conflict=0; memlock_t *memlock_table; #if defined(CLUSTER) && !defined(SGIALTIX) int lock = (proc-armci_clus_info[armci_clus_id(proc)].master)%NUM_LOCKS; #else int lock = 0; #endif #ifdef CORRECT_PTR if(! *armci_use_memlock_table) { /* if offset invalid, use dumb locking scheme ignoring addresses */ armci_lockmem_(start, end, proc); return; } # ifndef SGIALTIX /* when processes are attached to a shmem region at different addresses, * addresses written to memlock table must be adjusted to the node master */ if(armci_mem_offset) { start = armci_mem_offset + (char*)start; end = armci_mem_offset + (char*)end; } # endif #endif if(DEBUG_) { printf("%d: calling armci_lockmem for %d range %p -%p\n", armci_me, proc, start,end); fflush(stdout); } memlock_table = (memlock_t*)memlock_table_array[proc]; #ifdef ALIGN_ADDRESS /* align address range on cache line boundary to avoid false sharing */ pstart = ALIGN_ADDRESS(start); pend = CALGN -1 + ALIGN_ADDRESS(end); #else pstart=start; pend =end; #endif #ifdef CRAY_SHMEM { /* adjust according the remote process raw address */ long bytes = (long) ((char*)pend-(char*)pstart); extern void* armci_shmalloc_remote_addr(void *ptr, int proc); pstart = armci_shmalloc_remote_addr(pstart, proc); pend = (char*)pstart + bytes; } #endif #ifdef SGIALTIX if (proc == armci_me) { pstart = shmem_ptr(pstart,armci_me); pend = shmem_ptr(pend,armci_me); } /* In SGI Altix processes are attached to a shmem region at different addresses. Addresses written to memlock table must be adjusted to the node master */ if(ARMCI_Uses_shm()) { int i, seg_id=-1; size_t tile_size,offset; void *start_addr, *end_addr; for(i=0; i<seg_count; i++) { tile_size = armci_memoffset_table[i].tile_size; start_addr = (void*) ((char*)armci_memoffset_table[i].seg_addr + proc*tile_size); end_addr = (void*) ((char*)start_addr + armci_memoffset_table[i].seg_size); /* CHECK: because of too much "span" in armci_lockmem_patch in * strided.c, it is not possible to have condition as (commented):*/ /*if(pstart>=start_addr && pend<=end_addr) {seg_id=i; break;}*/ if(pstart >= start_addr && pstart <= end_addr) { seg_id=i; break; } } if(seg_id==-1) armci_die("armci_lockmem: Invalid segment", seg_id); offset = armci_memoffset_table[seg_id].mem_offset; pstart = ((char*)pstart + offset); pend = ((char*)pend + offset); } #endif while(1) { NATIVE_LOCK(lock,proc); armci_get(memlock_table, table, sizeof(table), proc); /* armci_copy(memlock_table, table, sizeof(table));*/ /* inspect the table */ conflict = 0; avail =-1; for(slot = 0; slot < MAX_SLOTS; slot ++) { /* nonzero starting address means the slot is occupied */ if(table[slot].start == NULL) { /* remember a free slot to store address range */ avail = slot; } else { /*check for conflict: overlap between stored and current range*/ if( (pstart >= table[slot].start && pstart <= table[slot].end) || (pend >= table[slot].start && pend <= table[slot].end) ) { conflict = 1; break; } /* printf("%d: locking %ld-%ld (%d) conflict\n", armci_me, */ } } if(avail != -1 && !conflict) break; NATIVE_UNLOCK(lock,proc); armci_waitsome( ++turn ); } /* we got the memory lock: enter address into the table */ table[avail].start = pstart; table[avail].end = pend; armci_put(table+avail,memlock_table+avail,sizeof(memlock_t),proc); FENCE_NODE(proc); NATIVE_UNLOCK(lock,proc); locked_slot = avail; #endif /* ! ARMCIX */ }
/*\ acquire exclusive LOCK to MEMORY area <pstart,pend> owned by process "proc" * . only one area can be locked at a time by the calling process * . must unlock it with armci_unlockmem \*/ void armci_lockmem(void *start, void *end, int proc) { register void* pstart, *pend; register int slot, avail=0; int turn=0, conflict=0; memlock_t *memlock_table; #if defined(CLUSTER) && !defined(SGIALTIX) int lock = (proc-armci_clus_info[armci_clus_id(proc)].master)%NUM_LOCKS; #else int lock = 0; #endif #ifdef CORRECT_PTR if(! *armci_use_memlock_table){ /* if offset invalid, use dumb locking scheme ignoring addresses */ armci_lockmem_(start, end, proc); return; } # ifndef SGIALTIX /* when processes are attached to a shmem region at different addresses, * addresses written to memlock table must be adjusted to the node master */ if(armci_mem_offset){ start = armci_mem_offset + (char*)start; end = armci_mem_offset + (char*)end; } # endif #endif if(DEBUG_){ printf("%d: calling armci_lockmem for %d range %p -%p\n", armci_me, proc, start,end); fflush(stdout); } memlock_table = (memlock_t*)memlock_table_array[proc]; #ifdef ALIGN_ADDRESS /* align address range on cache line boundary to avoid false sharing */ pstart = ALIGN_ADDRESS(start); pend = CALGN -1 + ALIGN_ADDRESS(end); #else pstart=start; pend =end; #endif #ifdef CRAY_SHMEM { /* adjust according the remote process raw address */ long bytes = (long) ((char*)pend-(char*)pstart); extern void* armci_shmalloc_remote_addr(void *ptr, int proc); pstart = armci_shmalloc_remote_addr(pstart, proc); pend = (char*)pstart + bytes; } #endif while(1){ NATIVE_LOCK(lock,proc); armci_get(memlock_table, table, sizeof(table), proc); /* armci_copy(memlock_table, table, sizeof(table));*/ /* inspect the table */ conflict = 0; avail =-1; for(slot = 0; slot < MAX_SLOTS; slot ++){ /* nonzero starting address means the slot is occupied */ if(table[slot].start == NULL){ /* remember a free slot to store address range */ avail = slot; }else{ /*check for conflict: overlap between stored and current range*/ if( (pstart >= table[slot].start && pstart <= table[slot].end) || (pend >= table[slot].start && pend <= table[slot].end) ){ conflict = 1; break; } /* printf("%d: locking %ld-%ld (%d) conflict\n", armci_me, */ } } if(avail != -1 && !conflict) break; NATIVE_UNLOCK(lock,proc); armci_waitsome( ++turn ); } /* we got the memory lock: enter address into the table */ table[avail].start = pstart; table[avail].end = pend; armci_put(table+avail,memlock_table+avail,sizeof(memlock_t),proc); FENCE_NODE(proc); NATIVE_UNLOCK(lock,proc); locked_slot = avail; }
int armci_direct_vector_get(request_header_t *msginfo , armci_giov_t darr[], int len, int proc) { return armci_RecvVectorFromSocket(SRV_sock[armci_clus_id(proc)],darr,len, (struct iovec *)((char*)(msginfo+1)+msginfo->dscrlen) ); }