Beispiel #1
0
void
flat_tree (STREAM_TYPE * target, STREAM_TYPE * source, int nreduce)
{
  /* Consider the root to be PE #0 */
  if (_world_rank == 0)
    {
      /* First, finish gathering */
      for (int n = 0; n < _world_size; n++)
	{
	  STREAM_TYPE *ptr = (STREAM_TYPE *) shmem_ptr (source, n);
	  /* Compute max */
	  for (int k = 0; k < nreduce; k++)
	    {
	      source[k] = REDUCE_MAX (ptr[k], source[k]);
	    }
	}
      /* Then, broadcast results */
      for (int n = 0; n < _world_size; n++)
	{
	  STREAM_TYPE *ptr = (STREAM_TYPE *) shmem_ptr (target, n);
	  for (int k = 0; k < nreduce; k++)
	    {
	      ptr[k] = source[k];
	    }
	}
    }
  shmem_barrier_all ();

  return;
}
Beispiel #2
0
void armci_memoffset_table_newentry(void *ptr, size_t seg_size) {

    void **ptr_arr;
    void *master_addr = NULL;
    size_t tile_size=0, offset=0;

    if(!ptr) armci_die("armci_memoffset_table_newentry : null ptr",0);

    if(seg_count >= MAX_SEGS) /* CHECK: make it dynamic */
        armci_die("armci_altix_allocate: Increase MAX_SEGS > 512", armci_me);

    if(armci_me == armci_master) master_addr = shmem_ptr(ptr, armci_me);
    armci_msg_brdcst(&master_addr, sizeof(void*), armci_master);

    ptr_arr = (void**)malloc(armci_nproc*sizeof(void*));
    armci_altix_gettilesize(ptr, ptr_arr, &tile_size);
    offset = (size_t)((char*)master_addr -  (char*)ptr_arr[armci_master]);

    /* enter in memoffset table */
    armci_memoffset_table[seg_count].seg_addr   = ptr_arr[armci_master];
    armci_memoffset_table[seg_count].seg_size   = seg_size;
    armci_memoffset_table[seg_count].tile_size  = tile_size;
    armci_memoffset_table[seg_count].mem_offset = offset;

#if DEBUG_
    printf("%d: addr=%p seg_size=%ld tile_size=%ld offset=%ld\n", armci_me,
           ptr_arr[armci_master], seg_size, tile_size, offset);
#endif

    ++seg_count;
    free(ptr_arr);
}
int main(void) 
{
   static int bigd[100];
      int *ptr;
      int i;

   shmem_init();

   if (shmem_my_pe() == 0) {
      /* initialize PE 1's bigd array */
      ptr = shmem_ptr(bigd, 1);
      if (ptr == NULL)
         printf("can't use pointer to directly access PE 1's array\n");
      else
         for (i=0; i<100; i++)
            *ptr++ = i+1;
   }

   shmem_barrier_all();

   if (shmem_my_pe() == 1) {
      printf("bigd on PE 1 is:\n");
      for (i=0; i<100; i++)
         printf(" %d\n",bigd[i]);
      printf("\n");
   }
  return 1;
}
Beispiel #4
0
/* SGI Altix Stuff */
static void armci_altix_gettilesize(void *ptr, void **ptr_arr,
                                    size_t *tile_size) {
    int i;
    size_t diff=0;
    for(i=0; i<armci_nproc; i++) {
        ptr_arr[i]=shmem_ptr(ptr,i);
        if(i>0) diff = (size_t)((char*)ptr_arr[i]-(char*)ptr_arr[i-1]);
        if(i>1 && diff!=*tile_size)
            armci_die("armci_memoffset_table_newentry:Inconsistent tile size",
                      armci_me);
        *tile_size = diff;
    }
}
Beispiel #5
0
void armci_altix_shm_malloc(void *ptr_arr[], armci_size_t bytes)
{
    long size=bytes;
    void *ptr;
    int i;
    ARMCI_PR_DBG("enter",0);
    armci_msg_lgop(&size,1,"max");
    ptr=kr_malloc((size_t)size, &altix_ctx_shmem);
    bzero(ptr_arr,(armci_nproc)*sizeof(void*));
    ptr_arr[armci_me] = ptr;
    if(size!=0 && ptr==NULL)
       armci_die("armci_altix_shm_malloc(): malloc failed", armci_me);
    for(i=0; i< armci_nproc; i++) if(i!=armci_me) ptr_arr[i]=shmem_ptr(ptr,i);
    ARMCI_PR_DBG("exit",0);
}
Beispiel #6
0
void armci_altix_shm_malloc_group(void *ptr_arr[], armci_size_t bytes,
                                  ARMCI_Group *group) {
    long size=bytes;
    void *ptr;
    int i,grp_me, grp_nproc;
    armci_grp_attr_t *grp_attr=ARMCI_Group_getattr(group);
    ARMCI_PR_DBG("enter",0);

    ARMCI_Group_size(group, &grp_nproc);
    ARMCI_Group_rank(group, &grp_me);
    armci_msg_group_lgop(&size,1,"max",group);
    ptr=kr_malloc((size_t)size, &altix_ctx_shmem_grp);
    if(size!=0 && ptr==NULL)
       armci_die("armci_altix_shm_malloc_group(): malloc failed for groups. Increase _SHMMAX_ALTIX_GRP", armci_me);
    bzero(ptr_arr,(grp_nproc)*sizeof(void*));
    ptr_arr[grp_me] = ptr;
    for(i=0; i< grp_nproc; i++) if(i!=grp_me) ptr_arr[i]=shmem_ptr(ptr,ARMCI_Absolute_id(group, i));
    ARMCI_PR_DBG("exit",0);
}
Beispiel #7
0
void *FORTRANIFY (shmem_ptr) (void *target, int *pe)
{
    return shmem_ptr (target, *pe);
}
Beispiel #8
0
FORTRAN_POINTER_T* shmem_ptr_f(FORTRAN_POINTER_T target, MPI_Fint *pe)
{
    return (FORTRAN_POINTER_T *)shmem_ptr(FPTR_2_VOID_PTR(target), OMPI_FINT_2_INT(*pe));
}
Beispiel #9
0
/*\ acquire exclusive LOCK to MEMORY area <pstart,pend> owned by process "proc"
 *   . only one area can be locked at a time by the calling process
 *   . must unlock it with armci_unlockmem
\*/
void armci_lockmem(void *start, void *end, int proc)
{
#ifdef ARMCIX
    ARMCIX_Lockmem (start, end, proc);
#else
    register void* pstart, *pend;
    register  int slot, avail=0;
    int turn=0, conflict=0;
    memlock_t *memlock_table;
#if defined(CLUSTER) && !defined(SGIALTIX)
    int lock = (proc-armci_clus_info[armci_clus_id(proc)].master)%NUM_LOCKS;
#else
    int lock = 0;
#endif

#ifdef CORRECT_PTR
    if(! *armci_use_memlock_table) {
        /* if offset invalid, use dumb locking scheme ignoring addresses */
        armci_lockmem_(start, end, proc);
        return;
    }

#  ifndef SGIALTIX
    /* when processes are attached to a shmem region at different addresses,
     * addresses written to memlock table must be adjusted to the node master
     */
    if(armci_mem_offset) {
        start = armci_mem_offset + (char*)start;
        end   = armci_mem_offset + (char*)end;
    }
#  endif
#endif

    if(DEBUG_) {
        printf("%d: calling armci_lockmem for %d range %p -%p\n",
               armci_me, proc, start,end);
        fflush(stdout);
    }
    memlock_table = (memlock_t*)memlock_table_array[proc];


#ifdef ALIGN_ADDRESS
    /* align address range on cache line boundary to avoid false sharing */
    pstart = ALIGN_ADDRESS(start);
    pend = CALGN -1 + ALIGN_ADDRESS(end);
#else
    pstart=start;
    pend =end;
#endif

#ifdef CRAY_SHMEM
    {   /* adjust according the remote process raw address */
        long bytes = (long) ((char*)pend-(char*)pstart);
        extern void* armci_shmalloc_remote_addr(void *ptr, int proc);
        pstart = armci_shmalloc_remote_addr(pstart, proc);
        pend   = (char*)pstart + bytes;
    }
#endif
#ifdef SGIALTIX
    if (proc == armci_me) {
        pstart = shmem_ptr(pstart,armci_me);
        pend = shmem_ptr(pend,armci_me);
    }
    /* In SGI Altix  processes are attached to a shmem region at different
    addresses. Addresses written to memlock table must be adjusted to
    the node master
     */
    if(ARMCI_Uses_shm()) {
        int i, seg_id=-1;
        size_t tile_size,offset;
        void *start_addr, *end_addr;
        for(i=0; i<seg_count; i++) {
            tile_size = armci_memoffset_table[i].tile_size;
            start_addr = (void*) ((char*)armci_memoffset_table[i].seg_addr +
                                  proc*tile_size);
            end_addr = (void*) ((char*)start_addr +
                                armci_memoffset_table[i].seg_size);
            /* CHECK: because of too much "span" in armci_lockmem_patch in
             * strided.c, it is not possible to have condition as (commented):*/
            /*if(pstart>=start_addr && pend<=end_addr) {seg_id=i; break;}*/
            if(pstart >= start_addr && pstart <= end_addr) {
                seg_id=i;
                break;
            }
        }
        if(seg_id==-1) armci_die("armci_lockmem: Invalid segment", seg_id);

        offset = armci_memoffset_table[seg_id].mem_offset;
        pstart = ((char*)pstart + offset);
        pend = ((char*)pend + offset);
    }
#endif

    while(1) {
        NATIVE_LOCK(lock,proc);

        armci_get(memlock_table, table, sizeof(table), proc);
        /*        armci_copy(memlock_table, table, sizeof(table));*/

        /* inspect the table */
        conflict = 0;
        avail =-1;
        for(slot = 0; slot < MAX_SLOTS; slot ++) {

            /* nonzero starting address means the slot is occupied */
            if(table[slot].start == NULL) {

                /* remember a free slot to store address range */
                avail = slot;

            } else {
                /*check for conflict: overlap between stored and current range*/
                if(  (pstart >= table[slot].start && pstart <= table[slot].end)
                        || (pend >= table[slot].start && pend <= table[slot].end) ) {

                    conflict = 1;
                    break;

                }
                /*
                printf("%d: locking %ld-%ld (%d) conflict\n",
                    armci_me,  */
            }
        }

        if(avail != -1 && !conflict) break;

        NATIVE_UNLOCK(lock,proc);
        armci_waitsome( ++turn );

    }

    /* we got the memory lock: enter address into the table */
    table[avail].start = pstart;
    table[avail].end = pend;
    armci_put(table+avail,memlock_table+avail,sizeof(memlock_t),proc);

    FENCE_NODE(proc);

    NATIVE_UNLOCK(lock,proc);
    locked_slot = avail;
#endif /* ! ARMCIX */
}