void armci_generic_rmw(int op, void *ploc, void *prem, int extra, int proc) { #if defined(CLUSTER) && !defined(SGIALTIX) int lock = (proc-armci_clus_info[armci_clus_id(proc)].master)%NUM_LOCKS; #else int lock = 0; #endif ARMCI_PR_DBG("enter",0); NATIVE_LOCK(lock,proc); switch (op) { case ARMCI_FETCH_AND_ADD: armci_get(prem,ploc,sizeof(int),proc); _a_temp = *(int*)ploc + extra; armci_put(&_a_temp,prem,sizeof(int),proc); break; case ARMCI_FETCH_AND_ADD_LONG: armci_get(prem,ploc,sizeof(long),proc); _a_ltemp = *(long*)ploc + extra; armci_put(&_a_ltemp,prem,sizeof(long),proc); break; case ARMCI_SWAP: #if (defined(__i386__) || defined(__x86_64__)) if(SERVER_CONTEXT || armci_nclus==1){ atomic_exchange(ploc, prem, sizeof(int)); } else #endif { armci_get(prem,&_a_temp,sizeof(int),proc); armci_put(ploc,prem,sizeof(int),proc); *(int*)ploc = _a_temp; } break; case ARMCI_SWAP_LONG: armci_get(prem,&_a_ltemp,sizeof(long),proc); armci_put(ploc,prem,sizeof(long),proc); *(long*)ploc = _a_ltemp; break; default: armci_die("rmw: operation not supported",op); } /*TODO memfence here*/ NATIVE_UNLOCK(lock,proc); ARMCI_PR_DBG("exit",0); }
int armci_copy_vector(int op, /* operation code */ armci_giov_t darr[], /* descriptor array */ int len, /* length of descriptor array */ int proc /* remote process(or) ID */ ) { int i,s,shmem= SAMECLUSNODE(proc); int armci_th_idx = ARMCI_THREAD_IDX; if(shmem){ /* local/shared memory copy */ for(i = 0; i< len; i++){ for( s=0; s< darr[i].ptr_array_len; s++){ armci_copy(darr[i].src_ptr_array[s],darr[i].dst_ptr_array[s],darr[i].bytes); } } }else { switch(op){ case PUT: for(i = 0; i< len; i++){ UPDATE_FENCE_STATE(proc, PUT, darr[i].ptr_array_len); for( s=0; s< darr[i].ptr_array_len; s++){ armci_put(darr[i].src_ptr_array[s],darr[i].dst_ptr_array[s], darr[i].bytes, proc); } } break; case GET: for(i = 0; i< len; i++){ for( s=0; s< darr[i].ptr_array_len; s++){ armci_get(darr[i].src_ptr_array[s],darr[i].dst_ptr_array[s], darr[i].bytes,proc); } } break; default: armci_die("armci_copy_vector: wrong optype",op); } } return 0; }
/*\ acquire exclusive LOCK to MEMORY area <pstart,pend> owned by process "proc" * . only one area can be locked at a time by the calling process * . must unlock it with armci_unlockmem \*/ void armci_lockmem(void *start, void *end, int proc) { register void* pstart, *pend; register int slot, avail=0; int turn=0, conflict=0; memlock_t *memlock_table; #if defined(CLUSTER) && !defined(SGIALTIX) int lock = (proc-armci_clus_info[armci_clus_id(proc)].master)%NUM_LOCKS; #else int lock = 0; #endif #ifdef CORRECT_PTR if(! *armci_use_memlock_table){ /* if offset invalid, use dumb locking scheme ignoring addresses */ armci_lockmem_(start, end, proc); return; } # ifndef SGIALTIX /* when processes are attached to a shmem region at different addresses, * addresses written to memlock table must be adjusted to the node master */ if(armci_mem_offset){ start = armci_mem_offset + (char*)start; end = armci_mem_offset + (char*)end; } # endif #endif if(DEBUG_){ printf("%d: calling armci_lockmem for %d range %p -%p\n", armci_me, proc, start,end); fflush(stdout); } memlock_table = (memlock_t*)memlock_table_array[proc]; #ifdef ALIGN_ADDRESS /* align address range on cache line boundary to avoid false sharing */ pstart = ALIGN_ADDRESS(start); pend = CALGN -1 + ALIGN_ADDRESS(end); #else pstart=start; pend =end; #endif #ifdef CRAY_SHMEM { /* adjust according the remote process raw address */ long bytes = (long) ((char*)pend-(char*)pstart); extern void* armci_shmalloc_remote_addr(void *ptr, int proc); pstart = armci_shmalloc_remote_addr(pstart, proc); pend = (char*)pstart + bytes; } #endif while(1){ NATIVE_LOCK(lock,proc); armci_get(memlock_table, table, sizeof(table), proc); /* armci_copy(memlock_table, table, sizeof(table));*/ /* inspect the table */ conflict = 0; avail =-1; for(slot = 0; slot < MAX_SLOTS; slot ++){ /* nonzero starting address means the slot is occupied */ if(table[slot].start == NULL){ /* remember a free slot to store address range */ avail = slot; }else{ /*check for conflict: overlap between stored and current range*/ if( (pstart >= table[slot].start && pstart <= table[slot].end) || (pend >= table[slot].start && pend <= table[slot].end) ){ conflict = 1; break; } /* printf("%d: locking %ld-%ld (%d) conflict\n", armci_me, */ } } if(avail != -1 && !conflict) break; NATIVE_UNLOCK(lock,proc); armci_waitsome( ++turn ); } /* we got the memory lock: enter address into the table */ table[avail].start = pstart; table[avail].end = pend; armci_put(table+avail,memlock_table+avail,sizeof(memlock_t),proc); FENCE_NODE(proc); NATIVE_UNLOCK(lock,proc); locked_slot = avail; }
/*\ acquire exclusive LOCK to MEMORY area <pstart,pend> owned by process "proc" * . only one area can be locked at a time by the calling process * . must unlock it with armci_unlockmem \*/ void armci_lockmem(void *start, void *end, int proc) { #ifdef ARMCIX ARMCIX_Lockmem (start, end, proc); #else register void* pstart, *pend; register int slot, avail=0; int turn=0, conflict=0; memlock_t *memlock_table; #if defined(CLUSTER) && !defined(SGIALTIX) int lock = (proc-armci_clus_info[armci_clus_id(proc)].master)%NUM_LOCKS; #else int lock = 0; #endif #ifdef CORRECT_PTR if(! *armci_use_memlock_table) { /* if offset invalid, use dumb locking scheme ignoring addresses */ armci_lockmem_(start, end, proc); return; } # ifndef SGIALTIX /* when processes are attached to a shmem region at different addresses, * addresses written to memlock table must be adjusted to the node master */ if(armci_mem_offset) { start = armci_mem_offset + (char*)start; end = armci_mem_offset + (char*)end; } # endif #endif if(DEBUG_) { printf("%d: calling armci_lockmem for %d range %p -%p\n", armci_me, proc, start,end); fflush(stdout); } memlock_table = (memlock_t*)memlock_table_array[proc]; #ifdef ALIGN_ADDRESS /* align address range on cache line boundary to avoid false sharing */ pstart = ALIGN_ADDRESS(start); pend = CALGN -1 + ALIGN_ADDRESS(end); #else pstart=start; pend =end; #endif #ifdef CRAY_SHMEM { /* adjust according the remote process raw address */ long bytes = (long) ((char*)pend-(char*)pstart); extern void* armci_shmalloc_remote_addr(void *ptr, int proc); pstart = armci_shmalloc_remote_addr(pstart, proc); pend = (char*)pstart + bytes; } #endif #ifdef SGIALTIX if (proc == armci_me) { pstart = shmem_ptr(pstart,armci_me); pend = shmem_ptr(pend,armci_me); } /* In SGI Altix processes are attached to a shmem region at different addresses. Addresses written to memlock table must be adjusted to the node master */ if(ARMCI_Uses_shm()) { int i, seg_id=-1; size_t tile_size,offset; void *start_addr, *end_addr; for(i=0; i<seg_count; i++) { tile_size = armci_memoffset_table[i].tile_size; start_addr = (void*) ((char*)armci_memoffset_table[i].seg_addr + proc*tile_size); end_addr = (void*) ((char*)start_addr + armci_memoffset_table[i].seg_size); /* CHECK: because of too much "span" in armci_lockmem_patch in * strided.c, it is not possible to have condition as (commented):*/ /*if(pstart>=start_addr && pend<=end_addr) {seg_id=i; break;}*/ if(pstart >= start_addr && pstart <= end_addr) { seg_id=i; break; } } if(seg_id==-1) armci_die("armci_lockmem: Invalid segment", seg_id); offset = armci_memoffset_table[seg_id].mem_offset; pstart = ((char*)pstart + offset); pend = ((char*)pend + offset); } #endif while(1) { NATIVE_LOCK(lock,proc); armci_get(memlock_table, table, sizeof(table), proc); /* armci_copy(memlock_table, table, sizeof(table));*/ /* inspect the table */ conflict = 0; avail =-1; for(slot = 0; slot < MAX_SLOTS; slot ++) { /* nonzero starting address means the slot is occupied */ if(table[slot].start == NULL) { /* remember a free slot to store address range */ avail = slot; } else { /*check for conflict: overlap between stored and current range*/ if( (pstart >= table[slot].start && pstart <= table[slot].end) || (pend >= table[slot].start && pend <= table[slot].end) ) { conflict = 1; break; } /* printf("%d: locking %ld-%ld (%d) conflict\n", armci_me, */ } } if(avail != -1 && !conflict) break; NATIVE_UNLOCK(lock,proc); armci_waitsome( ++turn ); } /* we got the memory lock: enter address into the table */ table[avail].start = pstart; table[avail].end = pend; armci_put(table+avail,memlock_table+avail,sizeof(memlock_t),proc); FENCE_NODE(proc); NATIVE_UNLOCK(lock,proc); locked_slot = avail; #endif /* ! ARMCIX */ }