/* Create shared region to store kr_malloc context in shared memory */ void armci_krmalloc_init_ctxwinshmem() { void *myptr=NULL; long idlist[SHMIDLEN]; long size; int offset = sizeof(void*)/sizeof(int); /* to store shared memory context and myptr */ size = SHMEM_CTX_MEM; if(armci_me == armci_master ){ myptr = Create_Shared_Region(idlist+1,size,idlist); if(!myptr && size>0 ) armci_die("armci_krmalloc_init_ctxwinshmem: could not create", (int)(size>>10)); if(size) *(volatile void**)myptr = myptr; if(DEBUG){ printf("%d:armci_krmalloc_init_ctxwinshmem addr mptr=%p ref=%p size=%ld\n", armci_me, myptr, *(void**)myptr, size); fflush(stdout); } /* Bootstrapping: allocate storage for ctx_winshmem_global. NOTE:there is offset,as master places its addr at begining for others to see */ ctx_winshmem_global = (context_t*) ( ((int*)myptr)+offset ); *ctx_winshmem_global=ctx_winshmem;/*master copies ctx into shared rgn*/ }
/*\ Collective Memory Allocation on shared memory systems \*/ void armci_shmem_malloc(void *ptr_arr[], armci_size_t bytes) { void *myptr=NULL, *ptr=NULL; long idlist[SHMIDLEN]; long size=0, offset=0; long *size_arr; void **ptr_ref_arr; int i,cn, len; int nproc = armci_clus_info[armci_clus_me].nslave; ARMCI_PR_DBG("enter",0); bzero((char*)ptr_arr,armci_nproc*sizeof(void*)); /* allocate work arrays */ size_arr = (long*)calloc(armci_nproc,sizeof(long)); if(!size_arr)armci_die("armci_malloc:calloc failed",armci_nproc); /* allocate arrays for cluster address translations */ # if defined(DATA_SERVER) len = armci_nclus; # else len = nproc; # endif ptr_ref_arr = calloc(len,sizeof(void*)); /* must be zero */ if(!ptr_ref_arr)armci_die("armci_malloc:calloc 2 failed",len); /* combine all memory requests into size_arr */ size_arr[armci_me] = bytes; armci_msg_lgop(size_arr, armci_nproc, "+"); /* determine aggregate request size on the cluster node */ for(i=0, size=0; i< nproc; i++) size += size_arr[i+armci_master]; /* master process creates shmem region and then others attach to it */ if(armci_me == armci_master ){ /* can malloc if there is no data server process and has 1 process/node*/ # ifndef RMA_NEEDS_SHMEM if(nproc == 1) myptr = kr_malloc(size, &ctx_localmem); else # endif myptr = Create_Shared_Region(idlist+1,size,idlist); if(!myptr && size>0 )armci_die("armci_malloc: could not create", (int)(size>>10)); /* place its address at begining of attached region for others to see */ if(size)armci_master_exp_attached_ptr(myptr); if(DEBUG_){ printf("%d:armci_malloc addr mptr=%p size=%ld\n",armci_me,myptr,size); fflush(stdout); } } /* broadcast shmem id to other processes on the same cluster node */ armci_msg_clus_brdcst(idlist, SHMIDLEN*sizeof(long)); if(armci_me != armci_master){ myptr=(double*)Attach_Shared_Region(idlist+1,size,idlist[0]); if(!myptr)armci_die("armci_malloc: could not attach", (int)(size>>10)); /* now every process in a SMP node needs to find out its offset * w.r.t. master - this offset is necessary to use memlock table */ if(size) armci_set_mem_offset(myptr); if(DEBUG_){ printf("%d:armci_malloc attached addr mptr=%p ref=%p size=%ld\n", armci_me,myptr, *(void**)myptr,size); fflush(stdout); } }
void test() { double *a, start=1., end=-1.; int len=100; long size = len*sizeof(double); long idlist[SHMIDLEN]; int numlock=10, i; lockset_t lockid; /* shared memory test */ if(me==0){ printf("Test shared memory\n"); a=(double*)Create_Shared_Region(idlist+1,size,idlist); assert(a); a[0]= start; a[len-1]=end; } MPI_Bcast(idlist,SHMIDLEN,MPI_LONG,0,MPI_COMM_WORLD); if(me){ a=(double*)Attach_Shared_Region(idlist+1,size,idlist[0]); assert(a); } if(me==nproc-1){ printf("%d: start=%f end=%f\n",me,a[0],a[len-1]); if(a[0]== start && a[len-1]== end) printf("Works!\n"); } /*printf("%d: a=%x\n",me,a); */ MPI_Barrier(MPI_COMM_WORLD); /* allocate locks */ if(me == 0){ a[0]=0.; CreateInitLocks(numlock, &lockid); printf("\nMutual exclusion test\n"); } MPI_Bcast(&lockid,sizeof(lockid),MPI_BYTE,0,MPI_COMM_WORLD); if(me)InitLocks(numlock, lockid); /* mutual exclusion test: * everybody increments shared variable 1000 times */ # define TIMES 1000 MPI_Barrier(MPI_COMM_WORLD); for(i=0;i<TIMES; i++){ NATIVE_LOCK(0); a[0]++; NATIVE_UNLOCK(0); } MPI_Barrier(MPI_COMM_WORLD); if(me==nproc-1){ printf("value of shared variable =%f should be %f\n", a[0],1.0*nproc*TIMES); if(a[0]==1.0*nproc*TIMES ) printf("Works!\n\n"); } /* cleanup of IPC resources */ if(me==0){ DeleteLocks(lockid); Delete_All_Regions(); } MPI_Barrier(MPI_COMM_WORLD); }