/*\ release lock in a callback function executed in context of processor "proc" \*/ void ARMCI_Gpc_unlock(int proc) { #if defined(CLUSTER) && !defined(SGIALTIX) int lock = (proc-armci_clus_info[armci_clus_id(proc)].master)%NUM_LOCKS; #else int lock = 0; #endif NATIVE_UNLOCK(lock,proc); }
void armci_unlockmem_(int proc) { #if defined(CLUSTER) && !defined(SGIALTIX) int lock = (proc-armci_clus_info[armci_clus_id(proc)].master)%NUM_LOCKS; #else int lock = 0; #endif if(DEBUG_){ printf("%d: armci_unlockmem_ proc=%d lock=%d\n",armci_me,proc,lock); fflush(stdout); } NATIVE_UNLOCK(lock,proc); }
void armci_generic_rmw(int op, void *ploc, void *prem, int extra, int proc) { #if defined(CLUSTER) && !defined(SGIALTIX) int lock = (proc-armci_clus_info[armci_clus_id(proc)].master)%NUM_LOCKS; #else int lock = 0; #endif ARMCI_PR_DBG("enter",0); NATIVE_LOCK(lock,proc); switch (op) { case ARMCI_FETCH_AND_ADD: armci_get(prem,ploc,sizeof(int),proc); _a_temp = *(int*)ploc + extra; armci_put(&_a_temp,prem,sizeof(int),proc); break; case ARMCI_FETCH_AND_ADD_LONG: armci_get(prem,ploc,sizeof(long),proc); _a_ltemp = *(long*)ploc + extra; armci_put(&_a_ltemp,prem,sizeof(long),proc); break; case ARMCI_SWAP: #if (defined(__i386__) || defined(__x86_64__)) if(SERVER_CONTEXT || armci_nclus==1){ atomic_exchange(ploc, prem, sizeof(int)); } else #endif { armci_get(prem,&_a_temp,sizeof(int),proc); armci_put(ploc,prem,sizeof(int),proc); *(int*)ploc = _a_temp; } break; case ARMCI_SWAP_LONG: armci_get(prem,&_a_ltemp,sizeof(long),proc); armci_put(ploc,prem,sizeof(long),proc); *(long*)ploc = _a_ltemp; break; default: armci_die("rmw: operation not supported",op); } /*TODO memfence here*/ NATIVE_UNLOCK(lock,proc); ARMCI_PR_DBG("exit",0); }
void armci_unlockmem_(int proc) { #ifdef BGML bgml_unlockmem(proc); #else #if defined(CLUSTER) && !defined(SGIALTIX) int lock = (proc-armci_clus_info[armci_clus_id(proc)].master)%NUM_LOCKS; #else int lock = 0; #endif NATIVE_UNLOCK(lock,proc); # ifdef LAPI { extern int kevin_ok; kevin_ok=1; } # endif #endif }
/*\ acquire exclusive LOCK to MEMORY area <pstart,pend> owned by process "proc" * . only one area can be locked at a time by the calling process * . must unlock it with armci_unlockmem \*/ void armci_lockmem(void *start, void *end, int proc) { register void* pstart, *pend; register int slot, avail=0; int turn=0, conflict=0; memlock_t *memlock_table; #if defined(CLUSTER) && !defined(SGIALTIX) int lock = (proc-armci_clus_info[armci_clus_id(proc)].master)%NUM_LOCKS; #else int lock = 0; #endif #ifdef CORRECT_PTR if(! *armci_use_memlock_table){ /* if offset invalid, use dumb locking scheme ignoring addresses */ armci_lockmem_(start, end, proc); return; } # ifndef SGIALTIX /* when processes are attached to a shmem region at different addresses, * addresses written to memlock table must be adjusted to the node master */ if(armci_mem_offset){ start = armci_mem_offset + (char*)start; end = armci_mem_offset + (char*)end; } # endif #endif if(DEBUG_){ printf("%d: calling armci_lockmem for %d range %p -%p\n", armci_me, proc, start,end); fflush(stdout); } memlock_table = (memlock_t*)memlock_table_array[proc]; #ifdef ALIGN_ADDRESS /* align address range on cache line boundary to avoid false sharing */ pstart = ALIGN_ADDRESS(start); pend = CALGN -1 + ALIGN_ADDRESS(end); #else pstart=start; pend =end; #endif #ifdef CRAY_SHMEM { /* adjust according the remote process raw address */ long bytes = (long) ((char*)pend-(char*)pstart); extern void* armci_shmalloc_remote_addr(void *ptr, int proc); pstart = armci_shmalloc_remote_addr(pstart, proc); pend = (char*)pstart + bytes; } #endif while(1){ NATIVE_LOCK(lock,proc); armci_get(memlock_table, table, sizeof(table), proc); /* armci_copy(memlock_table, table, sizeof(table));*/ /* inspect the table */ conflict = 0; avail =-1; for(slot = 0; slot < MAX_SLOTS; slot ++){ /* nonzero starting address means the slot is occupied */ if(table[slot].start == NULL){ /* remember a free slot to store address range */ avail = slot; }else{ /*check for conflict: overlap between stored and current range*/ if( (pstart >= table[slot].start && pstart <= table[slot].end) || (pend >= table[slot].start && pend <= table[slot].end) ){ conflict = 1; break; } /* printf("%d: locking %ld-%ld (%d) conflict\n", armci_me, */ } } if(avail != -1 && !conflict) break; NATIVE_UNLOCK(lock,proc); armci_waitsome( ++turn ); } /* we got the memory lock: enter address into the table */ table[avail].start = pstart; table[avail].end = pend; armci_put(table+avail,memlock_table+avail,sizeof(memlock_t),proc); FENCE_NODE(proc); NATIVE_UNLOCK(lock,proc); locked_slot = avail; }
void test() { double *a, start=1., end=-1.; int len=100; long size = len*sizeof(double); long idlist[SHMIDLEN]; int numlock=10, i; lockset_t lockid; /* shared memory test */ if(me==0){ printf("Test shared memory\n"); a=(double*)Create_Shared_Region(idlist+1,size,idlist); assert(a); a[0]= start; a[len-1]=end; } MPI_Bcast(idlist,SHMIDLEN,MPI_LONG,0,MPI_COMM_WORLD); if(me){ a=(double*)Attach_Shared_Region(idlist+1,size,idlist[0]); assert(a); } if(me==nproc-1){ printf("%d: start=%f end=%f\n",me,a[0],a[len-1]); if(a[0]== start && a[len-1]== end) printf("Works!\n"); } /*printf("%d: a=%x\n",me,a); */ MPI_Barrier(MPI_COMM_WORLD); /* allocate locks */ if(me == 0){ a[0]=0.; CreateInitLocks(numlock, &lockid); printf("\nMutual exclusion test\n"); } MPI_Bcast(&lockid,sizeof(lockid),MPI_BYTE,0,MPI_COMM_WORLD); if(me)InitLocks(numlock, lockid); /* mutual exclusion test: * everybody increments shared variable 1000 times */ # define TIMES 1000 MPI_Barrier(MPI_COMM_WORLD); for(i=0;i<TIMES; i++){ NATIVE_LOCK(0); a[0]++; NATIVE_UNLOCK(0); } MPI_Barrier(MPI_COMM_WORLD); if(me==nproc-1){ printf("value of shared variable =%f should be %f\n", a[0],1.0*nproc*TIMES); if(a[0]==1.0*nproc*TIMES ) printf("Works!\n\n"); } /* cleanup of IPC resources */ if(me==0){ DeleteLocks(lockid); Delete_All_Regions(); } MPI_Barrier(MPI_COMM_WORLD); }
/*\ acquire exclusive LOCK to MEMORY area <pstart,pend> owned by process "proc" * . only one area can be locked at a time by the calling process * . must unlock it with armci_unlockmem \*/ void armci_lockmem(void *start, void *end, int proc) { #ifdef ARMCIX ARMCIX_Lockmem (start, end, proc); #else register void* pstart, *pend; register int slot, avail=0; int turn=0, conflict=0; memlock_t *memlock_table; #if defined(CLUSTER) && !defined(SGIALTIX) int lock = (proc-armci_clus_info[armci_clus_id(proc)].master)%NUM_LOCKS; #else int lock = 0; #endif #ifdef CORRECT_PTR if(! *armci_use_memlock_table) { /* if offset invalid, use dumb locking scheme ignoring addresses */ armci_lockmem_(start, end, proc); return; } # ifndef SGIALTIX /* when processes are attached to a shmem region at different addresses, * addresses written to memlock table must be adjusted to the node master */ if(armci_mem_offset) { start = armci_mem_offset + (char*)start; end = armci_mem_offset + (char*)end; } # endif #endif if(DEBUG_) { printf("%d: calling armci_lockmem for %d range %p -%p\n", armci_me, proc, start,end); fflush(stdout); } memlock_table = (memlock_t*)memlock_table_array[proc]; #ifdef ALIGN_ADDRESS /* align address range on cache line boundary to avoid false sharing */ pstart = ALIGN_ADDRESS(start); pend = CALGN -1 + ALIGN_ADDRESS(end); #else pstart=start; pend =end; #endif #ifdef CRAY_SHMEM { /* adjust according the remote process raw address */ long bytes = (long) ((char*)pend-(char*)pstart); extern void* armci_shmalloc_remote_addr(void *ptr, int proc); pstart = armci_shmalloc_remote_addr(pstart, proc); pend = (char*)pstart + bytes; } #endif #ifdef SGIALTIX if (proc == armci_me) { pstart = shmem_ptr(pstart,armci_me); pend = shmem_ptr(pend,armci_me); } /* In SGI Altix processes are attached to a shmem region at different addresses. Addresses written to memlock table must be adjusted to the node master */ if(ARMCI_Uses_shm()) { int i, seg_id=-1; size_t tile_size,offset; void *start_addr, *end_addr; for(i=0; i<seg_count; i++) { tile_size = armci_memoffset_table[i].tile_size; start_addr = (void*) ((char*)armci_memoffset_table[i].seg_addr + proc*tile_size); end_addr = (void*) ((char*)start_addr + armci_memoffset_table[i].seg_size); /* CHECK: because of too much "span" in armci_lockmem_patch in * strided.c, it is not possible to have condition as (commented):*/ /*if(pstart>=start_addr && pend<=end_addr) {seg_id=i; break;}*/ if(pstart >= start_addr && pstart <= end_addr) { seg_id=i; break; } } if(seg_id==-1) armci_die("armci_lockmem: Invalid segment", seg_id); offset = armci_memoffset_table[seg_id].mem_offset; pstart = ((char*)pstart + offset); pend = ((char*)pend + offset); } #endif while(1) { NATIVE_LOCK(lock,proc); armci_get(memlock_table, table, sizeof(table), proc); /* armci_copy(memlock_table, table, sizeof(table));*/ /* inspect the table */ conflict = 0; avail =-1; for(slot = 0; slot < MAX_SLOTS; slot ++) { /* nonzero starting address means the slot is occupied */ if(table[slot].start == NULL) { /* remember a free slot to store address range */ avail = slot; } else { /*check for conflict: overlap between stored and current range*/ if( (pstart >= table[slot].start && pstart <= table[slot].end) || (pend >= table[slot].start && pend <= table[slot].end) ) { conflict = 1; break; } /* printf("%d: locking %ld-%ld (%d) conflict\n", armci_me, */ } } if(avail != -1 && !conflict) break; NATIVE_UNLOCK(lock,proc); armci_waitsome( ++turn ); } /* we got the memory lock: enter address into the table */ table[avail].start = pstart; table[avail].end = pend; armci_put(table+avail,memlock_table+avail,sizeof(memlock_t),proc); FENCE_NODE(proc); NATIVE_UNLOCK(lock,proc); locked_slot = avail; #endif /* ! ARMCIX */ }