Exemplo n.º 1
0
void DDI_ARMCI_Memory_init(size_t size) {
  int code;
  const DDI_Comm *comm = (const DDI_Comm *) Comm_find(DDI_COMM_WORLD);
  
  // malloc ARMCI memory
  code = ARMCI_Malloc((void*)gv(armci_mem_addr),size);
  if (code > 0) {
    ARMCI_Error("ARMCI_Malloc failed",code);
    Fatal_error(911);
  }
  gv(dda_index) = (DDA_Index*)gv(armci_mem_addr)[comm->me];

  // malloc ARMCI counter block and set addresses
  code = ARMCI_Malloc((void*)gv(armci_cnt_addr),sizeof(armci_counter_t)*2);
  if (code > 0) {
    ARMCI_Error("ARMCI_Malloc failed",code);
    Fatal_error(911);
  }
  ARMCI_PutValueLong(0, (void*)(gv(armci_cnt_addr)[comm->me]+0), comm->me);
  ARMCI_PutValueLong(0, (void*)(gv(armci_cnt_addr)[comm->me]+1), comm->me);
  DDI_ARMCI_DLB_addr();
  DDI_ARMCI_GDLB_addr();
  
  // create mutexes
  code = ARMCI_Create_mutexes(MAX_DD_ARRAYS+1);
  if (code > 0) {
    ARMCI_Error("ARMCI_Create_mutexes failed",code);
    Fatal_error(911);
  }
  gv(dlb_access) = MAX_DD_ARRAYS;
}
Exemplo n.º 2
0
   void SMP_destroy(int handle) {

      SMP_Data *node = (SMP_Data *) gv(smp_base_data);
      SMP_Data *prev = (SMP_Data *) gv(smp_base_data);

      DEBUG_OUT(LVL3,(stdout,"%s: smp_destroy - %i.\n",DDI_Id(),handle)) 
     
      if(node == NULL) {
         fprintf(stdout,"%s: Warning no SMP arrays to destroy.\n",DDI_Id());
         Fatal_error(911);
      }
     
      while(node) {
         if(node->handle == handle) {
          # if defined USE_SYSV
            MAX_DEBUG((stdout,"%s: detaching from shm addr %x, removing semid=%i.\n",
                    DDI_Id(),node->addr,node->semid))
            shmdt(node->addr);
            DDI_Sem_remove(node->semid);
          # else
            free(node->addr);
          # endif
            if(node == gv(smp_base_data)) gv(smp_base_data) = (SMP_Data *) node->next;
            else prev->next = node->next;
            free(node);
            return;
         }
         prev = (SMP_Data *) node;
         node = (SMP_Data *) node->next;
      }

      fprintf(stdout,"%s: Shared Memory handle %i was not found.\n",DDI_Id(),handle);
      Fatal_error(911);

   }
Exemplo n.º 3
0
   void DDI_ISend(void *buffer,size_t size,int to,int *req_val) {

      int global_pid;
      DDI_Request *req = &gv(isend_req);
      const DDI_Comm *comm = (const DDI_Comm *) Comm_find(DDI_WORKING_COMM);
    
    # if defined DDI_CHECK_ARGS
      if(to < 0 || to > comm->np) {
         fprintf(stdout,"%s: can not send to ddi process %i.\n",DDI_Id(),to);
         Fatal_error(911);
      }
    # endif

      
    # if defined DDI_SOC && !defined DDI_MPI
      pthread_attr_t thread_attr;
      pthread_attr_init(&thread_attr);
      pthread_attr_setscope(&thread_attr,PTHREAD_SCOPE_SYSTEM);
      req->to     = to;
      req->size   = size;
      req->buffer = buffer;
      if(pthread_create(&req->hnd_thread,&thread_attr,DDI_ISend_thread,req) == -1) {
         fprintf(stderr,"%s: pthread_create failed in DDI_ISend.\n",DDI_Id());
         Fatal_error(911);
      }
    # endif
     
    # if defined DDI_MPI
      MPI_Isend(buffer,size,MPI_BYTE,to,1,comm->compute_comm,req);
    # endif
     
      ULTRA_DEBUG((stdout,"%s: non-blocking send to %i issued.\n",DDI_Id(),to))

      *req_val = 1;
   }
Exemplo n.º 4
0
   void Comm_divide_custom(int ngroups, int *list, int comm_id, int *new_comm_id) {

      int nt;
      int i,in,ip,mygroup,sp,ep,np;
      const DDI_Comm *cur_comm = (const DDI_Comm *) Comm_find(comm_id);

      int *my_ids      = NULL;
      int *sn_by_group = (int *) Malloc((ngroups+1)*sizeof(int));

      if(ngroups <=0 || ngroups > cur_comm->nn) {
         fprintf(stdout,"%s: ngroups=%i (arg #1 of DDI_Comm_divide) is invalid.\n",DDI_Id,ngroups);
         Fatal_error(911);
      }

      for(i=0,nt=0; i<ngroups; i++) nt += list[i];

      if(nt != cur_comm->nn) {
         fprintf(stdout," DDI: invalid list of groups sizes in divide_custom.\n");
         Fatal_error(911);
      } 

      for(i=0,in=0; i<ngroups; i++) {
         sn_by_group[i] = in;
         in += list[i];
      }
      sn_by_group[ngroups] = in;

      mygroup = 0;
      while(sn_by_group[mygroup+1] <= cur_comm->my && mygroup < ngroups) mygroup++;

      if(mygroup == ngroups) {
         fprintf(stdout,"%s: unable to find my spot in the new groups.\n",DDI_Id());
         Fatal_error(911);
      }

      DEBUG_OUT(LVL4,(stdout,"%s: mygroup=%i\n",DDI_Id(),mygroup))
      
      sp = cur_comm->node_master[sn_by_group[mygroup]];

      if(mygroup+1 == ngroups) ep = cur_comm->np;
      else                     ep = cur_comm->node_master[sn_by_group[mygroup+1]];
      np = ep - sp;

      my_ids = (int *) Malloc(np*sizeof(int));

      for(ip=0,i=0; ip<cur_comm->np; ip++) {
         if(cur_comm->global_pid[ip] >= sp && cur_comm->global_pid[ip] < ep) my_ids[i++]=ip;
      }

      if(i != np) {
         fprintf(stdout,"%s: could not find %i processes expected for the new comm.\n",DDI_Id(),np);
         Fatal_error(911);
      }

      Comm_create(np,my_ids,ngroups,mygroup,comm_id,new_comm_id);

      free(my_ids);
      free(sn_by_group);
   }
Exemplo n.º 5
0
   void DDI_Comm_destroy(int commid) {
      const DDI_Comm *comm = (const DDI_Comm *) Comm_find(commid);

      DDI_Comm *curr_comm = &gv(ddi_base_comm);
      DDI_Comm *prev_comm = NULL;
/*
      DDI_Data *curr_data = &gv(ddi_base_data);
      DDI_Data *prev_data = NULL;
*/
      Comm_sync(124,comm);

      if(commid == DDI_COMM_WORLD) {
         fprintf(stdout,"%s: Cannot destroy DDI_COMM_WORLD.\n",DDI_Id());
         Fatal_error(911);
      }

      while(curr_comm->next && curr_comm->id != commid) {
         prev_comm = curr_comm;
         curr_comm = (DDI_Comm *) curr_comm->next;
      }

      if(curr_comm->id != commid) {
         fprintf(stdout,"%s: Error in DDI_Comm_destroy - Comm not found.\n",DDI_Id());
         Fatal_error(911);
      }
/*
      while(curr_data->next && curr_data->id != curr_comm->data_id) {
         prev_data = curr_data;
         curr_data = (DDI_Data *) curr_data->next;
      }

      if(curr_data->id != curr_comm->data_id) {
         fprintf(stdout,"%s: Error in DDI_Comm_destroy - Data not found.\n",DDI_Id());
         Fatal_error(911);
      }

    * ----------------------------------------------------------------------- *\
      Delete item from DDI_Data linked-list.
   \* ----------------------------------------------------------------------- *
      if(curr_comm->me_local == 0) shmdt((void *) curr_data->sync_array);
      prev_data->next = curr_data->next;
      free(curr_data);
*/
  
   /* ----------------------------------------------------------------------- *\
      Delete item from DDI_Comm linked-list.
   \* ----------------------------------------------------------------------- */
      free(curr_comm->smp_pid);
      free(curr_comm->local_nid);
      free(curr_comm->global_pid);
      free(curr_comm->global_nid);
      free(curr_comm->global_dsid);
      free(curr_comm->node_master);
      prev_comm->next = curr_comm->next;
      free(curr_comm);

   }
Exemplo n.º 6
0
   void Comm_divide(int ngroups, int comm_id, int *new_comm_id) {

      int i,in,nt,npg,nr;
      const DDI_Comm *cur_comm = (const DDI_Comm *) Comm_find(comm_id);

      int *list = (int *) Malloc(ngroups*sizeof(int));

      if(ngroups <=0 || ngroups > cur_comm->nn) {
         fprintf(stdout,"%s: ngroups=%i (arg #1 of DDI_Comm_divide) is invalid.\n",DDI_Id,ngroups);
         Fatal_error(911);
      }

      nt  = cur_comm->nn;
      npg = nt / ngroups;
      nr  = nt % ngroups;

      for(i=0,in=0; i<ngroups; i++) {
         list[i] = npg;
         if(i < nr) list[i]++;
      }

      Comm_divide_custom(ngroups,list,comm_id,new_comm_id);

      free(list);
   }
Exemplo n.º 7
0
   void Comm_divide(int ngroups, int comm_id, int *new_comm_id) {

      int i,in,nt,npg,nr;
      int err;
      const DDI_Comm *cur_comm = (const DDI_Comm *) Comm_find(comm_id);

      int *list = (int *) Malloc(ngroups*sizeof(int));

      if(ngroups <=0 || ngroups > cur_comm->nn) {
         fprintf(stdout,"%s: ngroups=%i (arg #1 of DDI_Comm_divide) is invalid.\n",DDI_Id,ngroups);
         Fatal_error(911);
      }

      nt  = cur_comm->nn;
      npg = nt / ngroups;
      nr  = nt % ngroups;

      for(i=0,in=0; i<ngroups; i++) {
         list[i] = npg;
         if(i < nr) list[i]++;
      }

#if defined DDI_BGL
      if (DDI_BGL_Comm_divide_custom(ngroups, list, comm_id, new_comm_id) != 0) {
	  if (cur_comm->me == 0) printf("%s: Groups are not aligned to BG/L psets.\n", DDI_Id());
	  Comm_divide_custom(ngroups,list,comm_id,new_comm_id);
      }
#else
      Comm_divide_custom(ngroups,list,comm_id,new_comm_id);
#endif

      free(list);
   }
Exemplo n.º 8
0
/* -------------------------------------------------------- *\
   DDI_GDLBRest()
   ==============
   
   Reset the global dynamic load-balance counter.
\* -------------------------------------------------------- */
   void DDI_GDLBReset() {
      int np,me;

    # if !defined USE_SYSV
      int remote_id = 0;
      DDI_Patch Patch;
    # endif

      if(DDI_WORKING_COMM != DDI_COMM_WORLD) {
         fprintf(stdout," DDI Error: group dlb counter must be reset in DDI_WORLD scope.\n");
         Fatal_error(911);
      }

      DDI_NProc(&np,&me);
      DDI_Sync(3041);

#if defined DDI_ARMCI
      DDI_ARMCI_GDLBReset();
#else
      if(me == 0) {
    # if defined USE_SYSV
         DDI_GDLBReset_local();
    # else
         Patch.oper = DDI_GDLBRESET;
         DDI_Send_request(&Patch,&remote_id,NULL);
    # endif
      }
#endif

      DDI_Sync(3042);
   }
Exemplo n.º 9
0
/* -------------------------------------- *\
   LAPI Active Message Completion Handler
\* -------------------------------------- */
   void DDI_AM_Compl_hndlr(lapi_handle_t *handl,void *param) {
   
      size_t nbytes;
      char *stack = (char *) param;
      DDI_Patch *patch = (DDI_Patch *) param;
      double *buffer = NULL;

      MAX_DEBUG((stdout,"%s: Entering LAPI Active Message Completion Handler.\n",DDI_Id()))

      nbytes  = sizeof(DDI_Patch);
      nbytes += (nbytes % sizeof(double));
      
      stack += nbytes;
      buffer = (double *) stack;

      nbytes += patch->ds_buffer_size;
            
      switch(patch->oper) {
         case DDI_GET:  DDI_Get_lapi_server(patch,buffer);  break;
         case DDI_PUT:  DDI_Put_lapi_server(patch,buffer);  break;
         case DDI_ACC:  DDI_Acc_lapi_server(patch,buffer);  break;
         default:
            fprintf(stdout,"%s: unknown operation in data server handler.\n",DDI_Id());
            Fatal_error(911);
      }
      
      if(patch->oper == DDI_ACC) DDI_Fence_release(patch->handle);
      DDI_Memory_heap_free(param,nbytes);

      MAX_DEBUG((stdout,"%s: Leaving LAPI AM Completion Handler.\n",DDI_Id()))
   }
Exemplo n.º 10
0
/* ------------------------- *\
   System Wrapper for malloc
\* ------------------------- */
   void *Malloc(size_t size) {
      void *addr;
      if((addr = malloc(size)) == NULL) {
         fprintf(stderr," Error: malloc(%lu) failed.\n",(unsigned long) size);
         Fatal_error(911);
      }
      return addr;
   }
Exemplo n.º 11
0
/* ---------------------------------------------- *\
   FORTRAN Wrapper to return MPI SMP communicator
\* --------------------------------------------- */
   void F77_SMP_GetMPIComm(int_f77 *commid) {
      # ifdef DDI_MPI
        DDI_Comm *comm = Comm_find(DDI_WORKING_COMM);
        *commid = comm->smp_comm;
      # else
        printf("DDI_SMP_GetMPIComm is only supported with an MPI build\n");
        Fatal_error(911);
      # endif
   }
Exemplo n.º 12
0
   void *SMP_find(int handle) {

      SMP_Data *data = (SMP_Data *) gv(smp_base_data);
      if(data == NULL) {
         fprintf(stdout,"%s: Warning.  No SMP Arrays allocated.\n",DDI_Id());
         Fatal_error(911);
      }

      do {
         if(data->handle == handle) return (void *) data;
         data = (SMP_Data *) data->next;
      } while(data);

      fprintf(stdout,"%s: Unable to find data struct associated with id %i.\n",DDI_Id(),handle);
      Fatal_error(911);

      return NULL;

   }
Exemplo n.º 13
0
   int SMP_create(size_t size) {

      int semflg = 0600;

      DDI_Comm *comm     = (DDI_Comm *) Comm_find(DDI_COMM_WORLD); /* hardcoded atm */
      SMP_Data *new_data = (SMP_Data *) Malloc(sizeof(SMP_Data));
      SMP_Data *end_data = (SMP_Data *) SMP_find_end();

      STD_DEBUG((stdout,"%s: Entered DDI_SMP_Create.\n",DDI_Id()))

      if(end_data) end_data->next = (void *) new_data;
      else gv(smp_base_data) = (SMP_Data *) new_data;

    # if defined USE_SYSV
      Comm_sync_smp(comm);
      if(comm->me_local == 0) { 
         new_data->handle = gv(smp_data_id)++; 
         new_data->shmid  = gv(shmid) = Shmget(IPC_PRIVATE,size,SHM_R|SHM_W);
         new_data->semid  = Semget(IPC_PRIVATE,1,semflg);
         new_data->size   = size;
         new_data->next   = NULL;
      }
      Comm_bcast_smp(new_data,sizeof(SMP_Data),0,comm);
      new_data->addr = Shmat(new_data->shmid,0,0);
      MAX_DEBUG((stdout,"%s: SMP memory [%i] shmid=%i, semid=%i, addr=%x.\n",
                 DDI_Id(),new_data->handle,new_data->shmid,new_data->semid,new_data->addr))
      Comm_sync_smp(comm);
      if(comm->me_local == 0) { Shmctl(new_data->shmid,IPC_RMID,NULL); gv(shmid) = 0; }
      Comm_sync_smp(comm);
    # else
      new_data->handle = gv(smp_data_id)++;
      new_data->size   = size;
      new_data->next   = NULL;
      new_data->addr   = Malloc(size);
/*
         MWS: May 2010
     It appears above that systems without SysV memory are expected to
     allocate Process-replicated memory instead of Node-replicated, and
     get on with it.  If these are duplicated, at full size, as it appears,
     that's likely devastating for the system total memory usage.

     The parallel CCSD(T) on IBM Blue Gene/P got into a deadlock, but
     other systems with sockets or MPI seem to work if allowed to proceed.
     At this time, we kill off just the BG here...

*/
       # ifdef IBMBG
         fprintf(stdout,"DDI compiled w/o SysV operating system support.\n");
         fprintf(stdout,"IBM/BG parallel CCSD(T) cannot run w/o SysV.\n");
         Fatal_error(911);
       # endif
    # endif
      
      return new_data->handle;
   }
Exemplo n.º 14
0
/* ------------------------------------- *\
   FORTRAN Wrapper for DDI_Masters_bcast
\* ------------------------------------- */
   void F77_BCast_node(int_f77* msgtag, char *type,void *buffer,int_f77 *len,int_f77 *root) {
      char t = *type;
      size_t size;

      if(t == 'i' || t == 'I')      size = (*len)*sizeof(int_f77);
      else if(t == 'f' || t == 'F') size = (*len)*sizeof(double);
      else {
         fprintf(stdout,"%s: Unknown type passed in DDI_BCast\n",DDI_Id());
         Fatal_error(911);
      }

      DDI_BCast_node(buffer,size,(int)*root);
   }
Exemplo n.º 15
0
   void ddi_comm_bcast_(int_f77 *mpicomm, char *type,void *buffer,int_f77 *len,int_f77 *root) {
      char t = *type;
      size_t size;

      if(t == 'i' || t == 'I')      size = (*len)*sizeof(int_f77);
      else if(t == 'f' || t == 'F') size = (*len)*sizeof(double);
      else {
         fprintf(stdout,"%s: Unknown type passed in DDI_BCast\n",DDI_Id());
         Fatal_error(911);
      }
      
      MPI_Bcast(buffer,size,MPI_BYTE,(int)*root,(int)*mpicomm);
   }
Exemplo n.º 16
0
/* ------------------------ *\
   System Wrapper for chdir
\* ------------------------ */
   int Chdir(const char *path) {
      int ret;
      if((ret=chdir(path)) < 0) {
         switch(errno) {
            case EFAULT:  fprintf(stdout,"Error: chdir(%s) failed (errno=EFAULT).\n",path);
            case EACCES:  fprintf(stdout,"Error: chdir(%s) failed (errno=EACCES).\n",path);
            case ENOTDIR:  fprintf(stdout,"Error: chdir(%s) failed (errno=ENOTDIR).\n",path);
            case ENAMETOOLONG:  fprintf(stdout,"Error: chdir(%s) failed (errno=ENAMETOOLONG).\n",path);
            default: fprintf(stdout,"Error: chdir(%s) failed (errno=unknown).\n",path);
         }
         Fatal_error(911);
      } 
      return ret;
   }
Exemplo n.º 17
0
 struct hostent *Gethostbyname(const char *name) {
    struct hostent *hp;
    if((hp = gethostbyname(name)) == NULL) {
       switch(h_errno) {
         case TRY_AGAIN:      fprintf(stdout," Warning: Gethostbyname returned TRY_AGAIN.\n");
                              fflush(stdout);  return Gethostbyname(name); break;
         case NO_DATA:        fprintf(stdout," Error: Gethostbyname(%s) returned NO_DATA.\n",name); break;
         case NO_RECOVERY:    fprintf(stdout," Error: Gethostbyname(%s) returned NO_RECOVERY.\n",name); break;
         case HOST_NOT_FOUND: fprintf(stdout," Error: Gethostbyname(%s) returned HOST_NOT_FOUND.\n",name); break;
         default:             fprintf(stdout," Error: Gethostbyname(%s) returned an unknown error (%s).\n",name,DDI_Id()); break;
       } 
       Fatal_error(911);
    }
    return hp;
 }
Exemplo n.º 18
0
void DDI_ARR_Element_select(DDI_ARR_Element *a, DDI_ARR_Element *b, int op) {
  if (a == NULL) {
    a = (DDI_ARR_Element*)malloc(sizeof(DDI_ARR_Element));
    *a = *b;
  }
  else {
    switch(op) {
    case DDI_ARR_MIN:
      if (b->alpha < a->alpha) *a = *b;
      break;
    case DDI_ARR_MAX:
      if (b->alpha > a->alpha) *a = *b;
      break;
    default:
      Fatal_error(1000);
      break;
    }
  }
}
Exemplo n.º 19
0
void DDI_ARMCI_Acquire(const DDA_ARMCI_Index *index, int handle, int proc, int ltype, void **array, int *armci_proc_ptr) {
  char *buf = NULL;
  int semid = index[proc].semid;
  int commid = gv(dda_comm)[handle];
  const DDI_Comm *comm = (const DDI_Comm *) Comm_find(commid);
  int armci_proc = comm->global_pid[proc];
  
# if defined DDI_MAX_DEBUG
  if(ltype != DDI_READ_ACCESS && ltype != DDI_WRITE_ACCESS) {
    fprintf(stdout,"%s: Invalid lock type requested.\n",DDI_Id());
    Fatal_error(911);
  }
# endif

  buf = (char*)gv(armci_mem_addr)[armci_proc];
  buf += index[proc].offset;
  *array  = (void*)buf;

#if defined DDI_ARMCI_LOCK
  DDI_ARMCI_Lock(semid,proc);
#endif

  *armci_proc_ptr = armci_proc;
}
Exemplo n.º 20
0
/* -------------------------------------------------------------------- *\
   DDI_Create_custom(idim,jdim,jcols,handle)
   =========================================
   [IN]  idim   - Number of rows in the array to be created.
   [IN]  jdim   - Number of columns in the array to be created.
   [IN]  jcols  - Array holding the number of columns to be given to
                - each processor when creating the distributed array.
   [OUT] handle - Handle given to the newly created array.
   
   Creates a distributed array where the user can customize how the
   array is distributed across the processors.
\* -------------------------------------------------------------------- */
   void DDI_Create_custom(int idim,int jdim,int *jcols,int *handle) {
   
      int i,np,me,nn,my;
      int inode;
      DDI_INT64 totwrds;
      DDI_INT64 longrows,longcols,longslice,longnd,long2g;
    # ifndef USE_SYSV
      int remote_id;
    # endif
      DDI_Patch patch;
      const DDI_Comm *comm = (const DDI_Comm *) Comm_find(DDI_WORKING_COMM);
      
      np = comm->np;
      me = comm->me;
      nn = comm->nn;
      my = comm->my;

      Comm_sync(3001,comm);

      /* find an unused handle */
      for (i=0; i<gv(nxtdda); ++i) {
        if (gv(ddacomm)[i] == DDI_COMM_NULL) break;
      }
      if (i==gv(nxtdda)) ++gv(nxtdda);
      *handle = i;
     
    # ifndef USE_SYSV
      remote_id = my;
    # endif

      DEBUG_ROOT(LVL2,(stdout," DDI: Entering DDI_Create_custom.\n"))
      DEBUG_ROOT(LVL2,(stdout," DDI: Creating Array [%i] - %ix%i=%i.\n",*handle,idim,jdim,idim*jdim))
      DEBUG_OUT(LVL3,(stdout,"%s: Entering DDI_Create_custom.\n",DDI_Id()))

    # ifdef DS_SIGNAL
      if(comm->me_local == 1) {
         signal(SIGALRM,DS_Thread_main);
      }
    # endif
      
      if(me == 0) {
         if(gv(dda_output)) {
            longrows = idim;
            longcols = jdim;
            totwrds = longrows*longcols;
            fprintf(stdout," DDI: Creating Array [%i] - %i x %i = %li words.\n",
                                    *handle,idim,jdim,totwrds);
            fflush(stdout);
         }
      }

   /*
       Make sure each slice of the distributed array will be under 2 GWords.

       Even on 64-bit hardware, most counting in this program is done
       with 32-bit data types, meaning we can't count higher than 2**31-1.

       If on 32-bit hardware, the 'long' data types here will be 32-bits,
       and so we'll see crazy products, including less than zero.
       In present form, nothing will be trapped here on a 32 bit machine!
   */
      longrows  = idim;
      longcols  = jdim;
      totwrds   = longrows*longcols;
   /*     Total distributed array over 2 Gwords is OK, but each  */
   /*     slice (MEMDDI per data server) must be under 2 GWords. */
   /*     TCP/IP has gv(nd)=-1 (uninitialized)                   */
   /*     Cray on one node has gv(nd)=0 since no d.s. exists.    */
      # if defined DDI_MPI
         longnd    = gv(nd);
         if (longnd <= 0) longnd=1;
      # endif
      # if defined DDI_SOC
         longnd = np;
      # endif
      longslice = totwrds/longnd;
   /*  next is largest signed 32 bit integer, stored as 64 bit quantity  */
      long2g   = 2147483643;
      if (longslice > long2g)
         {
            fprintf(stdout,"\n");
            fprintf(stdout," DDI: trouble creating distributed array!\n");
            fprintf(stdout," Current number of data servers is %li\n",longnd);
            fprintf(stdout," so each data server's slice of array");
            fprintf(stdout," [%i] is %li words\n",*handle,longslice);
            fprintf(stdout,"\n");
            fprintf(stdout," Add more processors so required total array");
            fprintf(stdout," size %li words\n",totwrds);
            fprintf(stdout," divided by no. of processors (data servers)");
            fprintf(stdout," is less than 2 Gwords= %li\n",long2g);
            fprintf(stdout," For example, %li or more data servers...\n",
                                1+totwrds/long2g);
            fprintf(stdout,"\n");
            fflush(stdout);
            Fatal_error(911);
         }

   /* ------------------------------------ *\
      Ensure 'jcols' is properly formatted
   \* ------------------------------------ */
      for(i=0; i<np; i++) {
         if(jcols[i] < 0 && me == 0) {
            fprintf(stdout," Error in argument 3 of DDI_Create_custom: Values must be >= 0.\n");
            Fatal_error(911);
         }
         
         if(i > 0)
         if(jcols[i] < jcols[i-1]) {
            fprintf(stdout," Error in argument 3 of DDI_Create_custom: Values must increase monotonically.\n");
            Fatal_error(911);
         }
      }
   
   /* ----------------------------------------------------------------- *\
      Check to ensure the maximum number of arrays hasn't been reached.
   \* ----------------------------------------------------------------- */
      if( gv(nxtdda) == MAX_DD_ARRAYS ) {
        if(me == 0) {
           fprintf(stderr," DDI Error:  The maximum number of distributed arrays [%i] has been reached.\n",MAX_DD_ARRAYS);
           fprintf(stderr," Information:  The maximum number of distributed arrays is a DDI compile-time option.\n");
        }
        Fatal_error(911);
      }

      gv(nrow)[*handle] = idim;
      gv(ncol)[*handle] = jdim;
      gv(ddacomm)[*handle]=gv(ddi_working_comm);
      
 
   /* ---------------------------------------------------- *\
      Generate Column Mapping by Compute Process & by Node
   \* ---------------------------------------------------- */
      for(i=0,inode=-1; i<np; i++) {
        gv(pcmap)[*handle][i] = jcols[i];

     /* if(inode == gv(ddiprocs)[i].node) continue; */
        if(inode == comm->local_nid[i]) continue;
        gv(ncmap)[*handle][++inode] = gv(pcmap)[*handle][i];
      }

      gv(pcmap)[*handle][np] = jdim;
      gv(ncmap)[*handle][nn] = jdim;


   /* -------------------------- *\
      Get local patch dimensions
   \* -------------------------- */
      DDI_DistribP(*handle,me,&patch);
 
   /* ----------------------------- *\
      Create Distributed Data Array
   \* ----------------------------- */
      patch.handle = *handle;
# if defined WINTEL
      patch.oper   = DDI_CREATE_OP;
# else
      patch.oper   = DDI_CREATE;
# endif
      patch.size   = jdim;


# if defined USE_SYSV || defined DDI_ARMCI || defined DDI_MPI2
      DDI_Index_create(&patch);
# else
      DDI_Send_request(&patch,&remote_id,NULL);
# endif 


   /* ----------------------------- *\
      Synchronize Compute Processes
   \* ----------------------------- */
      Comm_sync(3002,comm);

      DEBUG_OUT(LVL3,(stdout,"%s: Leaving DDI_Create_custom.\n",DDI_Id()))
   }
Exemplo n.º 21
0
   void Finalize_tasks(int *sockets,int nsocks) {
      char ack=57;
      int i,rank,maxfd,opensockets=nsocks;
      fd_set recvlist;
  
      while(opensockets) {

      /* ------------------------------------ *\
         Build an fd_set for the open sockets
      \* ------------------------------------ */
         maxfd = 0;
         FD_ZERO(&recvlist);
         for(i=0; i<nsocks; i++) {
            if(sockets[i] < 0) continue;
            maxfd = max(maxfd,sockets[i]);
            FD_SET(sockets[i],&recvlist);
         }
         maxfd++;
 
      
      /* ----------------------------- *\
         Wait for an incomming message
      \* ----------------------------- */
         select(maxfd,&recvlist,NULL,NULL,NULL);


      /* -------------------------------------- *\
         Message(s) received from a DDI process
      \* -------------------------------------- */
         for(i=0; i<nsocks; i++) {
            if(sockets[i] < 0) continue;
            if(FD_ISSET(sockets[i],&recvlist)) { 

            /* ------------------------------------------ *\
               socket[i] is readable ==> incoming message
            \* ------------------------------------------ */
               if( recv(sockets[i],&rank,sizeof(int),0) == 0 ) {

               /* ------------------------------------------------------------- *\
                  EOF received over socket ==> Unexpected closure of the socket
               \* ------------------------------------------------------------- */
                  fprintf(stdout," ddikick.x: application process %i quit unexpectedly.\n",i);
                  sockets[i] = -1;
                  Fatal_error(911);

               } else {
        
               /* ----------------------------------- *\
                  Proper termination message received
               \* ----------------------------------- */
                  send(sockets[i],&ack,1,0);
                  Recv(sockets[i],&ack,1,0);
                  FD_CLR(sockets[i],&recvlist);
                  sockets[i] = -1;
                  --opensockets;

                  MAX_DEBUG((stdout," ddikick.x: %i terminated properly.\n",i))
      }  }  }  }

      return;
   }
Exemplo n.º 22
0
/* ---------------------------------------------------------- *\
   DDI_GDLBNext(counter)
   ====================
   [OUT] counter - value of the load balance counter returned
                   to the calling process.

   An atomic operation that sets the value of counter to the
   value of the global load-balance counter, then increments
   the global counter.
\* --------------------------------------------------------- */
   void DDI_GDLBNext(size_t *counter) {
      int np,me,nn,my,tmp_scope,remote_id=0;
      DDI_Patch Patch;
      
    # if defined DDI_LAPI
      lapi_cntr_t org_cntr;
      uint tgt          = gv(lapi_map)[0];
      int *tgt_var      = gv(lapi_gdlb_cntr_addr)[tgt];
      int  in_val       = 1;
      int  prev_tgt_val = -1;
    # endif

# if defined DDI_ARMCI
      DDI_ARMCI_GDLBNext(counter);
      return;
# endif
    
      DDI_NProc(&np,&me);

      if(me == 0) {
      /* ---------------------------------- *\
         We need to work in the world scope
      \* ---------------------------------- */
         tmp_scope = DDI_WORKING_COMM;
         gv(ddi_working_comm) = DDI_COMM_WORLD;
   
         DDI_NProc(&np,&me);
         DDI_NNode(&nn,&my);

       # if FULL_SMP
       # if defined DDI_LAPI
         if(LAPI_Setcntr(gv(lapi_hnd),&org_cntr,0) != LAPI_SUCCESS) {
            fprintf(stdout,"%s: LAPI_Setcntr failed in DDI_GDLBNext.\n",DDI_Id());
            Fatal_error(911);
         }
         
         if(LAPI_Rmw(gv(lapi_hnd),FETCH_AND_ADD,tgt,tgt_var,&in_val,
                       &prev_tgt_val,&org_cntr) != LAPI_SUCCESS) {
            fprintf(stdout,"%s: LAPI_Rmw failed in DDI_GDLBNext.\n",DDI_Id());
            Fatal_error(911);
         }
         
         if(LAPI_Waitcntr(gv(lapi_hnd),&org_cntr,1,NULL) != LAPI_SUCCESS) {
            fprintf(stdout,"%s: LAPI_Waitcntr failed in DDI_GDLBNext.\n",DDI_Id());
            Fatal_error(911);
         }
         
         if(prev_tgt_val == -1) {
            fprintf(stdout,"%s: LAPI version of DDI_GDLBNext is not working correctly.\n",DDI_Id());
            Fatal_error(911);
         } else {
            *counter = (size_t) prev_tgt_val;
         }
       # else
         if(my == 0) {
            DDI_GDLBNext_local(counter);
         } else {
            Patch.oper = DDI_GDLBNEXT;
            DDI_Send_request(&Patch,&remote_id,NULL);
            DDI_Recv(counter,sizeof(size_t),remote_id);
         }
       # endif
       # else
         Patch.oper = DDI_GDLBNEXT;
         DDI_Send_request(&Patch,&remote_id,NULL);
         DDI_Recv(counter,sizeof(size_t),remote_id);
       # endif
   
      /* --------------------------- *\
         Return to the working scope
      \* --------------------------- */
         gv(ddi_working_comm) = tmp_scope;
      }

      DDI_BCast(counter,sizeof(size_t),0);
   }
Exemplo n.º 23
0
   void Comm_create(int np,int *ids, int ngroups, int mygroup, int comm_id, int *new_comm_id) {

      int i,ip,in,ismp,nn,nid,np_local,tmp;
      int err;
      size_t size;

      const DDI_Comm *cur_comm   = (DDI_Comm *) Comm_find(comm_id);
      const DDI_Comm *comm_world = &gv(ddi_base_comm);
      DDI_Comm *new_comm = (DDI_Comm *) Malloc(sizeof(DDI_Comm));
      DDI_Comm *end_comm = (DDI_Comm *) Comm_find_end();

      DEBUG_ROOT(LVL1,(stdout," DDI: Entering DDI_Create_comm.\n")) 
      DEBUG_OUT(LVL2,(stdout,"%s: Entering DDI_Create_comm.\n",DDI_Id()))

      Comm_sync(123,cur_comm);

   /* ------------------------------- *\
      Add new_comm to the linked list
   \* ------------------------------- */
      new_comm->next = NULL;
      end_comm->next = (void *) new_comm;

   /* new_data->next = NULL; */
   /* end_data->next = (void *) new_data; */

      new_comm->ngroups = ngroups;
      new_comm->mygroup = mygroup;

      new_comm->id = *new_comm_id = gv(ddi_comm_id)++;
      new_comm->local_nid  = (int *) Malloc(np*sizeof(int));
      new_comm->global_pid = (int *) Malloc(np*sizeof(int));
      new_comm->global_nid = (int *) Malloc(np*sizeof(int));

      i = 0;
      if(np > 1) {
         do {
	     
            if(ids[i] > cur_comm->np) {
               fprintf(stdout,"%s: Invalid id list in DDI_Comm_create.\n",DDI_Id());
               Fatal_error(911);
            }
   
            if(ids[i+1] < ids[i]) {
               tmp      = ids[i];
               ids[i]   = ids[i+1];
               ids[i+1] = tmp;
               if(i) i--; i--;
            }
   
         } while(++i < np-1);
      }

      Comm_sync(126,cur_comm);

      nn  = -1;
      nid = -1;
      np_local = 0;

      for(i=0; i<np; i++) {
         new_comm->global_pid[i] = cur_comm->global_pid[ids[i]];
         new_comm->global_nid[i] = cur_comm->global_nid[ids[i]];
         fflush(stdout);

         if(new_comm->global_nid[i] != nid) {
            nid = new_comm->global_nid[i];
            nn++;
         }

         new_comm->local_nid[i] = nn;
         
         if(nid == comm_world->my) np_local++;
         
      }

      nn++;
/*
      fprintf(stdout,"%s: new_comm->nn = %i.\n",DDI_Id(),nn);
      fprintf(stdout,"%s: new_comm->np_local = %i.\n",DDI_Id(),np_local);
*/
      Comm_sync(127,cur_comm);
      DEBUG_ROOT(LVL5,(stdout," comm_create - global_pid/global_nid formed.\n"))

      new_comm->smp_pid     = (int *) Malloc(np_local*sizeof(int));
      new_comm->node_master = (int *) Malloc(nn*sizeof(int));
      new_comm->global_dsid = (int *) Malloc(nn*sizeof(int));

      for(ip=0,in=-1,ismp=0,nid=-1; ip<np; ip++) {

         if(new_comm->global_nid[ip] != nid) {
            in++;
            nid = new_comm->global_nid[ip];
            new_comm->global_dsid[in] = comm_world->global_dsid[nid];
            new_comm->node_master[in] = new_comm->global_pid[ip];
         }

         if(new_comm->global_pid[ip] == comm_world->me) {
            new_comm->me = ip;
            new_comm->my = in;
         }
         
         if(nid == comm_world->my) {
            if(new_comm->global_pid[ip] == comm_world->me) new_comm->me_local = ismp;
            new_comm->smp_pid[ismp++] = new_comm->global_pid[ip];
         }
         
      }

      new_comm->nn = nn;
      new_comm->np = np;
      new_comm->np_local = ismp;

      DEBUG_OUT(LVL5,(stdout,"%s: np=%i, nn=%i, np_smp=%i, me=%i, my=%i, me_smp=%i.\n",
		      DDI_Id(),new_comm->np,new_comm->nn,new_comm->np_local,
		      new_comm->me,new_comm->my,new_comm->me_local));
      
    # if defined DDI_MPI
      new_comm->world_comm = cur_comm->world_comm;
      MPI_Comm_split(cur_comm->smp_comm,new_comm->global_pid[0],new_comm->me_local,&new_comm->smp_comm);
      MPI_Comm_split(cur_comm->compute_comm,new_comm->global_pid[0],new_comm->me,&new_comm->compute_comm);
      MPI_Comm_split(new_comm->compute_comm,new_comm->me_local,new_comm->my,&new_comm->node_comm);
    # endif
      
      DEBUG_OUT(LVL3,(stdout,"%s: Exiting DDI_Comm_create.\n",DDI_Id()))

   }
Exemplo n.º 24
0
   void DDI_Send_request_comm(void *buff,int *to,DDI_Request *req,int commid) {
      char ack;
      size_t size = sizeof(DDI_Patch);
      int i,np,me,nn,my;

      const DDI_Comm *comm = (const DDI_Comm *) Comm_find(commid);
    
      np = comm->np;
      me = comm->me;
      nn = comm->nn;
      my = comm->my; 

      *to = comm->global_dsid[*to];
      DEBUG_OUT(LVL3,(stdout,"%s: sending request to global process %i.\n",DDI_Id(),*to))

   /* ------------------------------------------------------------ *\
      Using TCP/IP sockets, this is always a synchronous operation
   \* ------------------------------------------------------------ */
    # if defined DDI_SOC
      Send(gv(sockets)[*to],buff,size,0);
      Recv(gv(sockets)[*to],&ack,1,0);
    # endif


   /* -------------------------------------------------------------- *\
      Using LAPI, this sends an active message to the target process
      causing an interrupt signal to be issued.  The target process
      now acts like a data server and handles the data request. This
      call is non-blocking, because the once the active message is
      sent, the target process is in control of the data and the 
      originating compute process only needs to wait until all the
      target process have finished and tells 'this' originating
      process that it can continue.  These are slightly different
      for get, put and accumulates.
   \* -------------------------------------------------------------- */
    # if defined DDI_LAPI
      DDI_Patch *patch = (DDI_Patch *) buff;
      uint tgt = gv(lapi_map)[*to];
      void *hdr_hndlr = (void *) gv(am_hndlr)[tgt];
      void *udata = NULL;
      ulong udata_len = 0;
      lapi_cntr_t *org_cntr = (lapi_cntr_t *) patch->cp_lapi_cntr;
      lapi_cntr_t *tgt_cntr = NULL;
      lapi_cntr_t *cmpl_cntr = NULL;
   
      if(LAPI_Amsend(gv(lapi_hnd),tgt,hdr_hndlr,buff,size,udata,udata_len,
                     tgt_cntr,org_cntr,cmpl_cntr) != LAPI_SUCCESS) {
          fprintf(stdout,"%s: lapi_amsend error in ddi_send_request.\n",DDI_Id());
          Fatal_error(911);
      }
    # endif


   /* ---------------------------------- *\
      The stand-alone MPI version of DDI
   \* ---------------------------------- */
    # if defined CRAY_MPI
      if(req == NULL) {
         MPI_Send(buff,size,MPI_BYTE,*to,37,comm->world_comm);
      } else {
         MPI_Isend(buff,size,MPI_BYTE,*to,37,comm->world_comm,req);
      }
      return;
    # endif
    # if defined DDI_MPI && !defined DDI_SOC && !defined DDI_LAPI
      DEBUG_OUT(LVL3,(stdout,"%s: calling mpi_ssend.\n",DDI_Id()))
      MPI_Ssend(buff,size,MPI_BYTE,*to,0,comm->world_comm);
    # endif


   /* ------------------------------------------------------------- *\
      Reverse look up the group rank of the global rank to which to
      the data request is being sent.
   \* ------------------------------------------------------------- */
   /*
      if(gv(scope) == DDI_GROUP) {
         for(i=0; i<np; i++) if(DDI_Id_global_proc(i+np) == *to) *to = i+np;
      }
   */

      DEBUG_OUT(LVL3,(stdout,"%s: leaving ddi_send_request.\n",DDI_Id()))

   }
Exemplo n.º 25
0
   static void Init_mpi(int targc,char *targv[]) {

    # ifndef HOSTNAME_LEN
    # define HOSTNAME_LEN  96
    # endif

      int argc = targc;
      char **argv = targv;
      int i,j,np,me,nc,nd,ndpn;
      int np_local,me_local;
      int nnodes,mynode,master;
      int icp,ids,cpus,myds,ext;
      int *ranks,*disp,*world;
      int *ranks_local;

      int me_mpi,me_ddi,rbn;

      MPI_Group Comm_World_grp;
      MPI_Group SMP_World_grp;
      MPI_Group SMP_Compute_grp;
      MPI_Group DDI_World_grp;
      MPI_Group DDI_Compute_grp;

      MPI_Comm SMP_World_comm;
      MPI_Comm SMP_Compute_comm;
      MPI_Comm SMP_Masters_comm;

      MPI_Comm DDI_World_comm;
      MPI_Comm DDI_Compute_comm;

      char hostname[HOSTNAME_LEN],*c,*hostnames;

      DDI_Comm *comm = (DDI_Comm *) &gv(ddi_base_comm);
      int threadLevel;

 # ifdef WINDOWS
   /* ------------------------------ *\
      Initialize Windows Sockets 2.2
   \* ------------------------------ */
      WORD wVersionRequested;
      WSADATA wsaData;
      wVersionRequested = MAKEWORD(2, 2);
      WSAStartup(wVersionRequested, &wsaData);      
 # endif

   /* -------------- *\
      Initialize MPI
   \* -------------- */
      if(MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &threadLevel) != MPI_SUCCESS) {
         fprintf(stdout," DDI: MPI_Init failed.\n");
         fflush(stdout); exit(911);
      }

   /* -------------------------------- *\
    * Initialize DDI working directory
   \* -------------------------------- */
      Init_scratch(argc,argv);


   /* ------------------------------------------ *\
      Determine Rank and Number of MPI Processes
   \* ------------------------------------------ */
      MPI_Comm_size(MPI_COMM_WORLD,&np);
      MPI_Comm_rank(MPI_COMM_WORLD,&me);


   /* -------------------------------------- *\
      For debugging purposes, set gv(myproc)
   \* -------------------------------------- */
      comm->me = me;
      DEBUG_ROOT(LVL1,(stdout," DDI: MPI initialized.  %i MPI processes.\n",np))


   /* ---------------------------------------------------- *\
      MPI-1 requires data servers unless it is using LAPI.
      MPI-2 does not require data servers at all.
      ----------------------------------------------------
      nc = 0  ==> standard data server model (cp:ds::1:1).
      nc = np ==> specialized model such as LAPI || MPI-2.
   \* ---------------------------------------------------- */
      nc = 0;
    # if defined DDI_LAPI || defined DDI_MPI2 || defined CRAY_MPI
      nc = np;
    # endif


   /* ------------------------------------------ *\
      Standard MPI-1 model (nc=0) ==> cp:ds::1:1
   \* ------------------------------------------ */
      if(nc == 0) {
         if((np % 2) && (me == 0)) {
            fprintf(stdout," Error: Expecting an even number of MPI processes (cp:ds::1:1).\n");
            Fatal_error(911);
         }
         
         nc = nd = np/2;
      }


   /* ------------------------------------------------ *\
      MPI-2 or MPI-1/LAPI model (nc=np) ==> cp:ds::1:0
   \* ------------------------------------------------ */
      if(nc == np) nd = 0;
      
      
   /* ------------------------------------------------------------- *\
      Check to make sure the job complies with compile time limits.
   \* ------------------------------------------------------------- */
      if(nc > MAX_PROCESSORS) {
         
         if(me == 0) {
            fprintf(stdout," DDI: \"Houston, we have a problem.\"\n");
            fprintf(stdout," DDI: MAX_NODES = %i\n",MAX_NODES);
            fprintf(stdout," DDI: MAX_SMP_PROCS = %i\n",MAX_SMP_PROCS);
            fprintf(stdout," DDI: MAX_PROCESSORS = MAX_NODES * MAX_SMP_PROCS = %i\n",MAX_PROCESSORS);
            fprintf(stdout," DDI: MPI reports %i processes ==> %i processors.\n",np,nc);
            fprintf(stdout," DDI: Please correct the limits and recompile DDI.\n");
            fflush(stdout);
         }
         
         MPI_Barrier(MPI_COMM_WORLD);
         MPI_Finalize();
         exit(0);
      }
      
      
   /* ------------------------------------------------------------------- *\
      Non-Standard MPI-1 Model (nc < np && ((nc | np) || (np-nc | np)))
      Can be used to vary the number of data server per node by assigning
      a number of data servers each compute process or a number of data
      server per node.  This code has not been implemented.
   \* ------------------------------------------------------------------- */
      if(nc != nd && nc != np) {
         fprintf(stdout," DDI: This should never have been executed.\n");
         Fatal_error(911);
      }


   /* ---------------------------------- *\
      System command to get the hostname
   \* ---------------------------------- */
      gethostname(hostname,HOSTNAME_LEN);
      DEBUG_OUT(LVL4,(stdout," MPI Process %i: hostname=%s\n",me,hostname))


   /* -------------------------------------------- *\
      Gather all the hostnames into a single array
   \* -------------------------------------------- */
      hostnames = (char *) Malloc(np*HOSTNAME_LEN);
      MPI_Allgather(hostname, HOSTNAME_LEN,MPI_BYTE,
                    hostnames,HOSTNAME_LEN,MPI_BYTE,MPI_COMM_WORLD);


   /* -------------------------------------- *\
      Determine all MPI Process on "my" node
   \* -------------------------------------- */
      ranks = (int *) Malloc(np*sizeof(int));
      for(i=0,np_local=0,c=hostnames; i<np; i++,c+=HOSTNAME_LEN) {
         if(strcmp(hostname,c) == 0) ranks[np_local++] = i;
      }
      DEBUG_OUT(LVL4,(stdout," MPI Process %i: %i local MPI processes.\n",me,np_local))

      ranks_local = (int *) Malloc(np_local*sizeof(int));
      memcpy(ranks_local,ranks,np_local*sizeof(int));


   /* ----------------------------- *\
      Create SMP_World communicator
   \* ----------------------------- */
      MPI_Comm_group(MPI_COMM_WORLD,&Comm_World_grp);
      MPI_Group_incl(Comm_World_grp,np_local,ranks_local,&SMP_World_grp);
      MPI_Comm_create(MPI_COMM_WORLD,SMP_World_grp,&SMP_World_comm);

      MPI_Barrier(MPI_COMM_WORLD);
      DEBUG_ROOT(LVL3,(stdout," DDI: SMP_World_comm created.\n"))

   /* ------------------------------ *\
      Create SMP_Master communicator
   \* ------------------------------ */
      MPI_Comm_rank(SMP_World_comm,&me_local);

      master = 0;
      if(me_local == 0) master = 1;

      MPI_Comm_split(MPI_COMM_WORLD,master,0,&SMP_Masters_comm);

      MPI_Barrier(MPI_COMM_WORLD);
      DEBUG_ROOT(LVL3,(stdout," DDI: SMP_Master_comm created.\n"))

   /* --------------------------------------------------------------------------- *\
      Create Compute_comm and World_comm communicators
      ================================================
      First gather the node information, then sort that information by node (not
      guarenteed to be sorted).  Next assign compute processes and data servers
      (if they exist), and finally create the communicators.
   \* --------------------------------------------------------------------------- */
      MPI_Comm_size(SMP_Masters_comm,&nnodes);
      MPI_Comm_rank(SMP_Masters_comm,&mynode);
      MPI_Bcast(&nnodes,1,MPI_INT,0,SMP_World_comm);
      MPI_Bcast(&mynode,1,MPI_INT,0,SMP_World_comm);
      
      MPI_Barrier(MPI_COMM_WORLD);
      DEBUG_ROOT(LVL3,(stdout," DDI: There are %i nodes.\n",nnodes))

   /* --------------------------------------- *\
      Check compile-time limits for MAX_NODES
   \* --------------------------------------- */
      if(nnodes > MAX_NODES) {
      
         if(me == 0) {
            fprintf(stdout," DDI: MAX_NODES = %i\n",MAX_NODES);
            fprintf(stdout," DDI: MPI topology suggests %i nodes.\n",nnodes);
            fprintf(stdout," DDI: Increase MAX_NODES and recompile DDI.\n");
            fflush(stdout);
         }
         
         MPI_Barrier(MPI_COMM_WORLD);
         MPI_Finalize();
         exit(0);
      }


   /* ----------------------- *\
      Gather node information
   \* ----------------------- */
      np_by_node = (int *) Malloc(nnodes*sizeof(int));
      ranks_by_node = (int **) Malloc(nnodes*sizeof(int*));

      if(me_local == 0) {
         DEBUG_OUT(LVL4,(stdout," MPI Process %i: Node %i master.\n",me,mynode))
	      
         MPI_Allgather(&np_local,1,MPI_INT,np_by_node,1,MPI_INT,SMP_Masters_comm);

         for(i=0,j=0; i<nnodes; i++) j += np_by_node[i];
         if(j != np) {
            fprintf(stdout,"ddi_init: got j= %i, expected np= %i\n",j,np);
            fprintf(stdout," DDI Error: Sum of PPN over all nodes != NP\n");
            Fatal_error(911);
         }

         disp = (int *) Malloc(nnodes*sizeof(int));
         for(i=1,disp[0]=0; i<nnodes; i++) disp[i] = disp[i-1] + np_by_node[i-1];

         MPI_Allgatherv(ranks_local,np_local,MPI_INT,ranks,np_by_node,disp,MPI_INT,
                        SMP_Masters_comm);
         free(disp);
      }

      MPI_Bcast(np_by_node,nnodes,MPI_INT,0,SMP_World_comm);
      MPI_Bcast(ranks,np,MPI_INT,0,SMP_World_comm);

      MPI_Barrier(MPI_COMM_WORLD);
      DEBUG_ROOT(LVL3,(stdout," DDI: Node topology determined.\n"))

      ranks_by_node[0] = ranks;
      for(i=1; i<nnodes; i++) ranks_by_node[i] = (ranks_by_node[i-1] + np_by_node[i-1]);


   /* --------------------------------------------------------------------------- *\
      Each MPI process has a list of MPI ranks sorted by node.  The list of ranks
      for a particular node is sorted from lowest to highest rank, where the rank
      corresponds to the value in MPI_COMM_WORLD communicator. Next determine the 
      number of compute processes/node.  Data servers/node can be inferred.
   \* --------------------------------------------------------------------------- */
      nc_by_node = (int *) Malloc(nnodes*sizeof(int));
      nd_by_node = (int *) Malloc(nnodes*sizeof(int));

      if(nc == nd) {

      /* ------------------------------------------------------------- *\
         There are a given number of data servers per compute process.
         Now the ratio must be 1:1.  CP:DS:1:N not implemented (yet).
      \* ------------------------------------------------------------- */
         j = nd/nc + 1;  /* j represents the number of MPI process per compute process */

         for(i=0; i<nnodes; i++) {

            if((np_by_node[i] % j)) {
               fprintf(stdout," DDI: For every CP requested there should be %i MPI processes.\n",j);
               fprintf(stdout," DDI Error: np on node %i is not divisible by %i.\n",i,j);
               Fatal_error(911);
            }

            nc_by_node[i] = np_by_node[i] / j;
            nd_by_node[i] = np_by_node[i] - nc_by_node[i];
         }

      }
      
      
      if(nc == np) {
      
       # if defined CRAY_MPI
      /* ------------------------------------------------------------- *\
         The environmental variable DDI_DS_PER_NODE is used to control
         the number of MPI processes that become data servers.
      \* ------------------------------------------------------------- */
         if(me == 0) {
           if(getenv("DDI_DS_PER_NODE")) {
             ndpn = atoi(getenv("DDI_DS_PER_NODE"));
           } else {
             ndpn = 1;
           }
           if(nnodes == 1) ndpn = 0;
           fprintf(stdout,"MPI is using %i data servers/node. (DDI_DS_PER_NODE)\n",ndpn);
         }
         MPI_Bcast(&ndpn,1,MPI_INT,0,MPI_COMM_WORLD);

      /* -------------------------------------------------------- *\
         If DDI_DS_PER_NODE is invalid, then shutdown gracefully.
      \* -------------------------------------------------------- */
         if(ndpn < 0 || ndpn > MAX_SMP_PROCS-1) {
           if(me == 0) {
             fprintf(stdout,"%s: DDI_DS_PER_NODE=%i is invalid.\n",
                  DDI_Id(),ndpn);
             fprintf(stdout,"%s: The value must between 0 and %i.\n",
                  DDI_Id(),MAX_SMP_PROCS-1);
             fflush(stdout);
             sleep(1);
           }
           MPI_Finalize();
         }

         nd = nnodes*ndpn;
         nc = np - nd;
       # endif


      /* --------------------------------------------- *\
         MPI-2 or MPI-1/LAPI model ==> no data servers
      \* --------------------------------------------- */
         for(i=0; i<nnodes; i++) {
             nc_by_node[i] = np_by_node[i];
             nd_by_node[i] = 0;

           # if defined CRAY_MPI
             nc_by_node[i] = np_by_node[i]-ndpn;
             nd_by_node[i] = ndpn;

          /* ------------------------------------------- *\
             Sanity check - Ensure >1 CP exists per node
          \* ------------------------------------------- */
             if(nc_by_node[i] <= 0) {
               if(me == 0) {
                 fprintf(stdout,
                   " ERROR: There are no CPs assigned to node %i.\n",i);
                 fprintf(stdout,
                   " The total number of processes on node %i = %i.\n",
                   i,np_by_node[i]);
                 fprintf(stdout,
                   " Attempted to reserve %i processes as data servers.\n",
                   ndpn);
                 fflush(stdout);
                 sleep(1);
               }
               MPI_Finalize();
             }
           # endif
         }
         
      } 

      gv(np) = np;
      gv(nc) = nc;
      gv(nd) = nd;
      
      DEBUG_ROOT(LVL3,(stdout," DDI: There are %i DDI compute processes.\n",nc))
      DEBUG_ROOT(LVL3,(stdout," DDI: There are %i DDI data servers.\n",nd))

   /* -------------------------------------------------------------------- *\
      Create a list of ranks that will eventually become the communicators
   \* -------------------------------------------------------------------- */
      world = (int *) Malloc(np*sizeof(int));

      for(i=0,icp=0,ids=nc; i<nnodes; i++) {
         for(j=0; j<np_by_node[i]; j++) {
            if(j<nc_by_node[i]) world[icp++] = ranks_by_node[i][j];
            else                world[ids++] = ranks_by_node[i][j];
         }
      }

      MPI_Barrier(MPI_COMM_WORLD);
      DEBUG_OUT(LVL4,(stdout," MPI Process %i: nc=%i; np=%i.\n",me,nc,np))


   /* ------------------------------------ *\
      Create DDI_Compute_comm communicator
   \* ------------------------------------ */
      MPI_Group_incl(Comm_World_grp,nc,world,&DDI_Compute_grp);
      MPI_Comm_create(MPI_COMM_WORLD,DDI_Compute_grp,&DDI_Compute_comm);


   /* ---------------------------------- *\
      Create DDI_World_comm communicator
   \* ---------------------------------- */
      MPI_Group_incl(Comm_World_grp,np,world,&DDI_World_grp);
      MPI_Comm_create(MPI_COMM_WORLD,DDI_World_grp,&DDI_World_comm);


   /* ------------------------------------ *\
      Create SMP_Compute_comm communicator
   \* ------------------------------------ */
      MPI_Group_intersection(DDI_Compute_grp,SMP_World_grp,&SMP_Compute_grp);
      MPI_Comm_create(MPI_COMM_WORLD,SMP_Compute_grp,&SMP_Compute_comm);

      DEBUG_ROOT(LVL3,(stdout," DDI: finished forming communicators.\n"))

   /* ------------------------------------ *\
      Finished creating MPI communicators.
      Initialize internal DDI structures.
   \* ------------------------------------ */
      MPI_Comm_rank(DDI_World_comm,&me);
      comm->np = nc;
      comm->me = me;
      comm->nn = nnodes;
      comm->my = mynode;

      MPI_Comm_rank(MPI_COMM_WORLD,&me_mpi); 
      MPI_Comm_rank(DDI_World_comm,&me_ddi); 

      DEBUG_OUT(LVL3,(stdout," MPI Process %i = DDI Process %i\n",me_mpi,me_ddi))
      
      comm->id           = DDI_COMM_WORLD;
      comm->smp_comm     = SMP_Compute_comm;
      comm->world_comm   = DDI_World_comm;
      comm->compute_comm = DDI_Compute_comm;
      comm->node_comm    = SMP_Masters_comm;
      comm->smp_world    = SMP_World_comm;

    # if !defined USE_SYSV 
      comm->nn = nc;
      comm->my = me;
      if(comm->my >= nc) comm->my -= nc;
      comm->smp_comm     = MPI_COMM_SELF;
      comm->node_comm    = DDI_Compute_comm;
    # endif


   /* -------------------------------------------------------------------- *\
      Check for network extention.  The extension would be appended to the
      hostname if it becomes necessary to form a TCP/IP socket to the host
   \* -------------------------------------------------------------------- */
    # ifdef DDI_SOC
      for(i=0,ext=0; i<argc && strcmp("-netext",argv[i]) != 0; i++);
      if(i != argc) ext = ++i;
    # endif


   /* ---------------------------------------------------------------- *\
      Scan through the list of hostnames and extract the node topology
   \* ---------------------------------------------------------------- */
      MPI_Allgather(hostname, HOSTNAME_LEN,MPI_BYTE,
                    hostnames,HOSTNAME_LEN,MPI_BYTE,DDI_World_comm);

      MPI_Allgather(&me,1,MPI_INT,ranks_local,1,MPI_INT,SMP_World_comm);
      if(me_local == 0) {
         disp = (int *) Malloc(nnodes*sizeof(int));
         for(i=1,disp[0]=0; i<nnodes; i++) disp[i] = disp[i-1] + np_by_node[i-1];
         MPI_Allgatherv(ranks_local,np_local,MPI_INT,ranks,np_by_node,disp,MPI_INT,
                        SMP_Masters_comm);
         free(disp);
      }
      MPI_Bcast(ranks,np,MPI_INT,0,SMP_World_comm);

      for(i=0; i<nnodes; i++) {

         cpus = nc_by_node[i];
         master = ranks_by_node[i][0];

      /* --------------------------------------------------------------- *\
         For each node, one data server is chosen from the all the data
         servers on that node in a round-robin manner based on the rank
         of the process.
      \* --------------------------------------------------------------- */
         if(nd_by_node[i]) myds = cpus + (me % nd_by_node[i]);
         else              myds = -1;
 
 
      /* --------------------------------------------------------------- *\
         Using LAPI or MPI-2, we have no data servers, but we still need
         to know which compute process to interrupt to get, put, or acc!
      \* --------------------------------------------------------------- */
       # if defined DDI_LAPI
         myds = (me % nc_by_node[i]);
       # endif 


      /* ------------------------------------------------------ *\
         Sanity check: myds must correspond to a rank on node i
      \* ------------------------------------------------------ */
      /*  1st bit of next line was 'i<nd', changed by Ryan to 'nd', May 2010 */
         if(nd && (myds < 0 || myds >= np_by_node[i])) {
           if(me == 0) {
             fprintf(stdout," ERROR: Unable to assign a DS for node %i.\n",i);
             fprintf(stdout," Please report this error to:\n");
             fprintf(stdout,"   [email protected] and/or\n");
             fprintf(stdout,"   [email protected]\n");
             fprintf(stdout," myds=%i; np_by_node[%i]=%i\n",
                      myds,i,np_by_node[i]);
             fflush(stdout);
           # if defined WINDOWS
             Sleep(1*1000);
           # else
             sleep(1);
           # endif
           }
           MPI_Finalize();
         }


      /* ----------------------------------------------------- *\
         For each remote node, assign a data server rank
      \* ----------------------------------------------------- */
         if(nd) gv(ddinodes)[i].myds       = ranks_by_node[i][myds];
         else   gv(ddinodes)[i].myds       = -1;

      /* --------------------------------- *\
         Save these values in gv(ddinodes)
      \* --------------------------------- */
         gv(ddinodes)[i].cpus       = cpus;
         gv(ddinodes)[i].nodemaster = master;


      /* ----------------------------------------------------------------- *\
         Dig up the hostname of the node and append any network extensions
      \* ----------------------------------------------------------------- */
       # ifdef DDI_SOC
         c = (hostnames + master*HOSTNAME_LEN);
         if(ext) strcat(c,argv[ext]);
       # endif


      /* ------------------------------------------------------------------- *\
         All DDI processes on the node share the same node rank and hostname
      \* ------------------------------------------------------------------- */
         for(j=0; j<np_by_node[i]; j++) {
            rbn = ranks_by_node[i][j];
            gv(ddiprocs)[rbn].node = i;

          # ifdef DDI_SOC
            gv(ddiprocs)[rbn].hostname = (char *) strdup(c);
          # endif

          # if !defined USE_SYSV
            gv(ddiprocs)[rbn].node = rbn;
            if(rbn >= comm->np) gv(ddiprocs)[rbn].node -= comm->np;
          # endif

         }

      }


   /* ------------------------- *\
      Free any Malloc'ed Memory
   \* ------------------------- */
      free(hostnames);
      free(world);
      free(ranks_local);



   /* ---------------------------- *\
      Do NOT free global variables
   \* ---------------------------- */
/* --- moved to ddi_finalize
      free(ranks);
      free(np_by_node);
      free(nc_by_node);
      free(nd_by_node);
      free(ranks_by_node);
*/


   /* ---------------------------------- *\
      Synchronize processes and continue
   \* ---------------------------------- */
      MPI_Barrier(MPI_COMM_WORLD);
      DEBUG_ROOT(LVL3,(stdout," DDI: Init_mpi finished.\n"))
   }
Exemplo n.º 26
0
/* -------------------------------------- *\
\* -------------------------------------- */
   static void Init_soc_accept(int np, int me,int recvsock, int *sockets) {
      int on=1;
      fd_set readlist;

    # if defined SOC_BUFFER_SIZE
      int buffer = 0;
    # endif
     
      int isock = 0;
      int tsock = 0;
      int procid = 0;
      int nsocks = np; 
      int maxrank = np;

      if(me < np) nsocks = me; 
      if(USING_DATA_SERVERS()) maxrank *= 2;
      
      
   /* ---------------------------------- *\
      Start accepting socket connections 
   \* ---------------------------------- */
      while(isock < nsocks) {
         FD_ZERO(&readlist);
         FD_SET(recvsock,&readlist);
         
      /* --------------------------------------------------- *\
         Wait here until there is an incoming call on 'port'
      \* --------------------------------------------------- */
         select(recvsock+1,&readlist,NULL,NULL,NULL);

         
      /* --------------------------------------------------- *\
         The receptionist picks up the phone from the call
         on 'port' and routes the call to a new line 'tsock'
      \* --------------------------------------------------- */
         tsock = Accept(recvsock,NULL,NULL);

 
      /* ----------------------------------------------------- *\
         Remove the builtin TCP delay & set send/recv timeouts
      \* ----------------------------------------------------- */
         setsockopt(tsock,IPPROTO_TCP,TCP_NODELAY,(void *) &on,sizeof(int));

         
      /* ---------------------- *\
         Set socket buffer size
      \* ---------------------- */
       # if defined SOC_BUFFER_SIZE
         buffer = SOC_BUFFER_SIZE;
         setsockopt(tsock,SOL_SOCKET,SO_RCVBUF,(void *) &buffer,sizeof(int));
         setsockopt(tsock,SOL_SOCKET,SO_SNDBUF,(void *) &buffer,sizeof(int));
       # endif

  
      /* ------------------------------------- *\
         The caller now introduces him/herself
      \* ------------------------------------- */
         if(Recv(tsock,&procid,sizeof(int),0) == 0) {
            fprintf(stdout,"%s: Detected a problem on another DDI process.\n",DDI_Id());
            Fatal_error(911);
         }

         if(procid < 0 || procid > maxrank) {
            fprintf(stdout,"%s: Improper socket introduction ... abort ...\n",DDI_Id());
            Fatal_error(911);
         }

 
      /* ---------------------------------------------------------------------- *\
         Save caller information, i.e. DDI process 'procid' has now checked-in!
      \* ---------------------------------------------------------------------- */
         if(sockets[procid] == -1) {  /* Everything is OK */
            sockets[procid] = tsock;
            isock++;

            MAX_DEBUG((stdout,"%s: Accepted connection from DDI Process %i.\n",DDI_Id(),procid))

         } else {  /* Not everything is OK :( */
Exemplo n.º 27
0
   static void Init_soc(int argc,char** argv) {
	   
   /* --------------- *\
      Local Variables
   \* --------------- */
      int i,j,iproc,nodearg,me;
      int kickport,nodeid,procid,nn,np,nsocks;
      int *ports,*sockets;
      char kickhost[256];
      int not_it;
      int ncpus,cpus,nics;
      char tmpstr[256];

      DDI_Comm *comm = (DDI_Comm *) &gv(ddi_base_comm);

   /* ---------------------- *\
      Assign signal handlers
   \* ---------------------- */
      signal(SIGTERM, Fatal_error);     /* handles a terminate signal */
      signal(SIGFPE,  Fatal_error);     /* handles a floating point error in the DDI process */
      signal(SIGABRT, Fatal_error);
      signal(SIGSEGV, Fatal_error);
      signal(SIGINT,  Fatal_error);
      signal(SIGILL,  Fatal_error);
      signal(SIGQUIT, Fatal_error);
      signal(SIGPIPE, Fatal_error);
      signal(SIGXCPU, Fatal_error);
      signal(SIGURG,  _sigurg_hndlr);   /* handles a OOB (out-of-band) message */
    # if defined DDI_MPI
      signal(SIGURG,  Fatal_error);
    # endif

   
   /* ---------------------------------- *\
      Find where the DDI arguments begin 
   \* ---------------------------------- */
    # if defined DDI_SOC && !defined DDI_MPI
      for(i=0,not_it=1; i<argc && not_it; i++) not_it = strcmp(argv[i],"-ddi");
      if(i==argc) {
         fprintf(stdout," DDI: Invalid command-line arguments!\n Are you using the ddikick program??\n");
         Fatal_error(911);
      }

      strcpy(kickhost,argv[i++]); 
      kickport = atoi(argv[i++]);               /*  kickoff port number */
      nodeid   = atoi(argv[i++]);               /*  rank of current node */
      procid   = atoi(argv[i++]);               /*  rank of current proc */
      nn       = atoi(argv[i++]);               /*  number of nodes */
      np       = atoi(argv[i++]);               /*  number of procs */


   /* --------------------------------------------------------------- *\
    * Initialize DDI_COMM_WORLD communicator.  This is not a complete
    * initialization; the remaining bits and pieces will be set up in
    * Comm_init().
   \* --------------------------------------------------------------- */
      comm->id = DDI_COMM_WORLD;
      comm->nn = nn;
      comm->my = nodeid;
      comm->np = np;
      comm->me = procid;


   /* -------------------------- *\
      Initialize Local Variables
   \* -------------------------- */
      me       = procid;
      nodearg  = i;
      nsocks   = np;
      if(USING_DATA_SERVERS()) nsocks *= 2;


      DEBUG_OUT(LVL1,(stdout," DDI Process %i: Execution begun on node %i.\n",procid,nodeid))
      DEBUG_OUT(LVL3,(stdout," The master kickoff program responds on %s:%i\n",kickhost,kickport))

 
   /* ------------------------------------------------ *\
      Parse command-line arguments for DDI information
   \* ------------------------------------------------ */
      if(Parse_node_args(argc,argv,nodearg,nn,gv(ddinodes)) < 0 ) {
         fprintf(stderr,"%s: Error while parsing node arguments.\n",DDI_Id());
         Fatal_error(911);
      }


   /* ------------------------------------------------ *\
    * Parse command-line for scratch/working directory
   \* ------------------------------------------------ */
      Init_scratch(argc,argv);


   /* ----------------------------------------------------------- *\
    * Using parsed information, initialize global data structures
   \* ----------------------------------------------------------- */
      for(i=0,iproc=0,ncpus=0; i<nn; i++) {
        gv(ddinodes)[i].nodemaster = iproc;
        cpus = gv(ddinodes)[i].cpus;
        nics = gv(ddinodes)[i].nics;
        gv(ddinodes)[i].myds = (me % cpus) + ncpus + np;
        DEBUG_OUT(LVL4,(stdout,"%s: ddinodes[%i] = { nodemaster=%i cpus=%i nics=%i }\n",DDI_Id(),i,iproc,cpus,nics))
        ncpus += cpus;
      # if !defined USE_SYSV
        if(cpus > 1 && comm->me == 0) {
           if(comm->me == 0) 
           fprintf(stdout,"%s %s\n%s %s\n",
                          "Error: detected :cpus in the ddi argument list, however," 
                          "DDI was not compiled using -DUSE_SYSV.  The program is"
                          "confused.  Either recompile DDI using shared-memory or"
                          "generate a proper argument list.");
           Fatal_error(911);
        }
      # endif
        for(j=0; j<cpus; j++,iproc++) {
           gv(ddiprocs)[iproc].node    = i;
           gv(ddiprocs)[iproc+np].node = i;
           strcpy(tmpstr,gv(ddinodes)[i].hostname);
           if(nics) {
             strcat(tmpstr,gv(ddinodes)[i].netext[j%nics]);
           }

           DEBUG_OUT(LVL4,(stdout,"%s: ddiprocs[%i] = { node=%i hostname=%s }\n",DDI_Id(),iproc,i,tmpstr))

           gv(ddiprocs)[iproc].hostname    = (char *) strdup(tmpstr);

         # if FULL_SMP
           if(nn == 1) continue;
         # endif

           gv(ddiprocs)[iproc+np].hostname = (char *) strdup(tmpstr);

        }
      }

    # else

   /* --------------------------------------------------------------- *\
      When mixing MPI and TCP/IP, MPI has been previously initialized
   \* --------------------------------------------------------------- */
      DDI_NProc(&np,&me);
      nsocks = np;
      if(USING_DATA_SERVERS()) nsocks += np;
    # endif
      

   /* ----------------------------------- *\
      Allocate memory for sockets & ports
   \* ----------------------------------- */
      sockets = (int *) Malloc(nsocks*sizeof(int));
      ports   = (int *) Malloc(nsocks*sizeof(int));

      gv(sockets) = sockets;
 
      for(i=0; i<nsocks; i++) {
        sockets[i] = -1;
        ports[i]   = -1;
      }
   
   
   /* ----------------------------------------------------------------- *\
      Create a server socket for accepting connections to DDI processes
   \* ----------------------------------------------------------------- */
      gv(serversock) = SOC_Create(SERVER_SOCKET,&gv(serverport),NULL);
      DEBUG_OUT(LVL3,(stdout,"%s: Receiving connections on port %d\n",DDI_Id(),gv(serverport)))


   /* --------------------------------------------- *\
      Connect to Master Kickoff Program.
      Socket ownership is required to handle SIGURG
      Use Premium network if available.
   \* ---------------------------------------------- */
    # if defined DDI_SOC && !defined DDI_MPI
      gv(kickoffsock) = SOC_Create(CLIENT_SOCKET,&kickport,kickhost);


   /* ---------------------------------- *\
      Report information back to ddikick
   \* ---------------------------------- */
      Send(gv(kickoffsock),&comm->me,sizeof(int),0);
      Send(gv(kickoffsock),&gv(serverport),sizeof(int),0);


   /* ------------------------------------------------- *\
      Wait until all DDI processes have checked in.
      The master ddikick.x will broadcast the ddi info.
   \* ------------------------------------------------- */
      Recv(gv(kickoffsock),ports,nsocks*sizeof(int),0);
      DEBUG_OUT(LVL3,(stdout,"%s: Received the list of server ports.\n",DDI_Id()))

    # else

      DEBUG_OUT(LVL3,(stdout,"%s: using MPI to gather TCP/IP port info.\n",DDI_Id()))
      MPI_Allgather(&gv(serverport),1,MPI_INT,ports,1,MPI_INT,comm->world_comm);

    # endif

 
   /* ------------------------------------------------------- *\
      Now starts the process of connecting the DDI processes:
      Each DDI compute process actively connects to all DDI
      processes whose ranks are greater than its own, but
      only after it accepts connections from lower ranks.
   \* ------------------------------------------------------- */
      Init_soc_accept(np,me,gv(serversock),sockets);
      Init_soc_create(np,me,nsocks,ports,sockets);
      
   }
Exemplo n.º 28
0
/* -------------------------------------------------------------- *\
   DDI_GetAccP(handle,patch,buff)
   ============================
   [IN] handle - Handle of the distributed array to be accessed.
   [IN] patch  - structure containing ilo, ihi, jlo, jhi, etc.
   [IN] buff   - Data segment to be operated on.
\* -------------------------------------------------------------- */
   void DDI_GetAccP(int handle,DDI_Patch *patch,void *buff) {
   
   /* --------------- *\
      Local Variables
   \* --------------- */
      char ack=57;
      int i,np,me,nn,my,remote_id,nsubp;
      int ranks[MAX_NODES];
      DDI_Patch subp[MAX_NODES];
      char *working_buffer = (char *) buff;

    # if defined DDI_LAPI
      DDI_Patch *local_patch = NULL;
      lapi_cntr_t cntr[MAX_NODES];
    # endif
    
      STD_DEBUG((stdout,"%s: Entering DDI_GetAccP.\n",DDI_Id()))

   /* -------------------- *\
      Process OR Node Rank
   \* -------------------- */
      DDI_NProc(&np,&me);
      DDI_NNode(&nn,&my);


   /* ------------------------------------- *\
      Ensure the patch has the correct info
   \* ------------------------------------- */
      patch->oper   = DDI_GETACC;
      patch->handle = handle;


   /* ---------------------------------- *\
      Check calling arguments for errors
   \* ---------------------------------- */
    # if defined DDI_CHECK_ARGS
      if(handle < 0 || handle >= gv(ndda)) {
         fprintf(stdout,"%s: Invalid handle [%i] in DDI_GetAcc.\n",DDI_Id(),handle);
         Fatal_error(911);
      }
      
      if(patch->ilo > patch->ihi || patch->ilo < 0 || patch->ihi >= gv(nrow)[handle]) {
         fprintf(stdout,"%s: Invalid row dimensions during DDI_GetAcc => ilo=%i ihi=%i.\n",DDI_Id(),patch->ilo,patch->ihi);
         Fatal_error(911);
      }
      
      if(patch->jlo > patch->jhi || patch->jlo < 0 || patch->jhi >= gv(ncol)[handle]) {
         fprintf(stdout,"%s: Invalid colum dimensions during DDI_GetAcc => jlo=%i jhi=%i.\n",DDI_Id(),patch->jlo,patch->jhi);
         Fatal_error(911);
      }
    # endif


   /* ------------------------------ *\
      Log some simple profiling info
   \* ------------------------------ */
    # if defined DDI_COUNTERS
      gv(acc_profile).ncalls++;
      gv(acc_profile).nbytes += DDI_Patch_sizeof(patch);
    # endif


   /* ------------------------------------------------------- *\
      Determine where the pieces of the requested patch exist
   \* ------------------------------------------------------- */
      DDI_Subpatch(handle,patch,&nsubp,ranks,subp);
      MAX_DEBUG((stdout,"%s: %i subpatches.\n",DDI_Id(),nsubp))

      
   /* ------------------------------------------------------------------- *\
      Send data requests for all non-local pieces of the requested patch.
      Operate immediately to GetAcc a local portion of the patch.
   \* ------------------------------------------------------------------- */
      for(i=0; i<nsubp; i++) {
         ULTRA_DEBUG((stdout,"%s: GetAccumulating subpatch %i.\n",DDI_Id(),i))

      /* ------------------------------------------------------------- *\
         Using SysV, take advantage of shared-memory for a local patch
      \* ------------------------------------------------------------- */
       # if defined USE_SYSV

      /* ------------------------------------------------ *\
         Determine if the ith patch is local to 'my' node
      \* ------------------------------------------------ */
         if(ranks[i] == my) {
            MAX_DEBUG((stdout,"%s: Subpatch %i is local.\n",DDI_Id(),i))

         /* ---------------------------------------------------- *\
            Using LAPI, perform the local Getacc after all the data
            requests have been sent ==> maximize concurrency.
         \* ---------------------------------------------------- */
          # if defined DDI_LAPI
            local_patch = &subp[i];
            local_patch->cp_buffer_addr = working_buffer;
          # else
         /* --------------------------------------------- *\
            Otherwise, perform the local Getacc immediately.
         \* --------------------------------------------- */
            DDI_GetAcc_local(&subp[i],working_buffer);
          # endif

         /* ------------------------------------------------------- *\
            Move the working buffer to the next patch and continue.
         \* ------------------------------------------------------- */
            working_buffer += subp[i].size;
            continue;
         }
       # endif


      /* --------------------------------- *\
         If the current patch is NOT local 
      \* --------------------------------- */
         remote_id = ranks[i];


      /* ----------------------------------------------- *\
         Using LAPI, then include some extra information
      \* ----------------------------------------------- */
       # if defined DDI_LAPI
         subp[i].cp_lapi_id     = gv(lapi_map)[me];
         subp[i].cp_lapi_cntr   = (void *) &cntr[i];
         subp[i].cp_buffer_addr = (void *) working_buffer;
         LAPI_Setcntr(gv(lapi_hnd),&cntr[i],0);

         ULTRA_DEBUG((stdout,"%s: cp_lapi_id=%i.\n",DDI_Id(),gv(lapi_map)[me]))
         ULTRA_DEBUG((stdout,"%s: cp_lapi_cntr=%x.\n",DDI_Id(),&cntr[i]))
         ULTRA_DEBUG((stdout,"%s: cp_buffer_addr=%x.\n",DDI_Id(),working_buffer))
       # endif
      
      /* -------------------------------- *\
         Send data request for subpatch i
      \* -------------------------------- */
         MAX_DEBUG((stdout,"%s: Sending data request to node %i.\n",DDI_Id(),remote_id))
         DDI_Send_request(&subp[i],&remote_id,NULL);
         MAX_DEBUG((stdout,"%s: data request sent to global process %i.\n",DDI_Id(),remote_id))


      /* ------------------------------------------------------------ *\
         Receive an acknowledgement that the data server has raised
         a fence that will protect the distributed array from get or
         put access until all accumulates have finished.  This block-
         ing receive ensures that the current process executing this
         accumulate can *NOT* finish, until the fence has been raised 
      \* ------------------------------------------------------------ */
       # if !defined DDI_LAPI
       # if defined USE_SYSV
         MAX_DEBUG((stdout,"%s: Receiving remote fence ACK.\n",DDI_Id()))
         DDI_Recv(&ack,1,remote_id);
       # endif


      /* ---------------------------- *\
         Recv subpatch from remote_id
      \* ---------------------------- */
         MAX_DEBUG((stdout,"%s: Sending subpatch %i to %i.\n",DDI_Id(),i,remote_id))
         DDI_Send(working_buffer,subp[i].size,remote_id);
         DDI_Recv(working_buffer,subp[i].size,remote_id);
       # endif

      
      /* ------------ *\
         Shift buffer 
      \* ------------ */
         working_buffer += subp[i].size;
      }

   /* ----------------------------------------------------------- *\
      Using LAPI, perform the local Getaccumulate (if needed) as the
      remote processes are getting the data to Getaccumulate on the
      target processes.  Then wait for all the data to be copied
      out of the buffer before returning.
   \* ----------------------------------------------------------- */
    # if defined DDI_LAPI

   /* ------------------------------------ *\
      GetAccumulating local patch (if exists)
   \* ------------------------------------ */
      if(local_patch) DDI_GetAcc_local(local_patch,local_patch->cp_buffer_addr);

   /* ---------------------------------------------------------- *\
      Wait for all remote LAPI_Gets to finish copying local data
   \* ---------------------------------------------------------- */
      for(i=0; i<nsubp; i++) {
         if(subp[i].cp_lapi_cntr) {
            ULTRA_DEBUG((stdout,"%s: Wait for subpatch %i to be copied.\n",DDI_Id(),i))
            LAPI_Waitcntr(gv(lapi_hnd),&cntr[i],3,NULL);
            ULTRA_DEBUG((stdout,"%s: Subpatch %i copy completed.\n",DDI_Id(),i))
         }
      }
Exemplo n.º 29
0
   void Kickoff_PBS(const Node_info *ddinodes,const Cmdline_info *info) {
      char ddiinfo[] = "-ddi";
      char procid[8];
      char portid[8];
      char nodeid[8];
      char snodes[8];
      char sprocs[8];
      char **rargs;
      char **argv = info->argv;
      int i,j,r,iarg,nargs = info->ddiarg + info->nnodes + 8;
      int inode,ncpus,np = info->nprocs;
      int ntests;

      if(info->nnodes == 1) return;

      int tm_errno;
      tm_task_id *tid;
      tm_event_t *spawn;
      tm_event_t polled;
      struct tm_roots roots;
      tm_node_id *nodelist;


   /* ---------------------------------- *\
      Initialize PBS Task Management API
   \* ---------------------------------- */
      if(tm_init(0, &roots) != TM_SUCCESS) {
         fprintf(stderr, " ddikick.x: tm_init failed\n");
         Fatal_error(911);
      }

      if(tm_nodeinfo(&nodelist, &np) != TM_SUCCESS) {
         fprintf(stderr, " ddikick.x: tm_nodeinfo failed.\n");
         Fatal_error(911);
      }

      tid   = (tm_task_id *) Malloc(2*np*sizeof(tm_task_id)); 
      spawn = (tm_event_t *) Malloc(2*np*sizeof(tm_event_t));

      for(i=0; i<2*np; i++) {
         *(tid + i)   = TM_NULL_TASK;
         *(spawn + i) = TM_NULL_EVENT;
      }


   /* ----------------------------------------- *\
      Initialize arguments to kickoff DDI tasks
   \* ----------------------------------------- */
      rargs = (char **) Malloc(nargs*sizeof(char*));

      sprintf(portid, "%d", info->kickoffport);
      sprintf(snodes, "%d", info->nnodes);
      sprintf(sprocs, "%d", info->nprocs);     

      for(i=1,r=0; i<info->ddiarg-1; i++) rargs[r++] = argv[i];

      rargs[r++] = ddiinfo;
      rargs[r++] = info->kickoffhost;    /*   kickoff host name     */
      rargs[r++] = portid;               /*   kickoff port number   */
      rargs[r++] = nodeid;               /*   rank of this node     */
      rargs[r++] = procid;               /*   rank of this process  */
      rargs[r++] = snodes;               /*   number of nodes       */
      rargs[r++] = sprocs;               /*   number of processors  */
  
      for(i=0,iarg=info->nodearg; i<info->nnodes; i++,iarg++) {
         rargs[r++] = argv[iarg];
      }   
          
      rargs[r] = NULL;


   /* ------------------------ *\
      Spawn DDI tasks to nodes
   \* ------------------------ */
      ncpus=ddinodes[0].cpus+ddinodes[1].cpus;
      for(i=ddinodes[0].cpus,inode=1; i<np; i++) {
         
         if(i == ncpus) ncpus += ddinodes[++inode].cpus;
         
         sprintf(nodeid,"%d",inode);
         sprintf(procid,"%d",i);

       # if DDI_DEBUG
         DEBUG_START(DEBUG_MAX)
         fprintf(stdout,"DDI Process %i PBS tm_spawn arguments: ",i);
         for(iarg=0; iarg<r; iarg++) fprintf(stdout,"%s ",rargs[iarg]);
         fprintf(stdout,"\n");
         DEBUG_END()
       # endif

      /* ------------------------- *\
         Spawn DDI Compute Process
      \* ------------------------- */
         if(tm_spawn(r,rargs,NULL,*(nodelist+i),(tid+i),spawn+i) != TM_SUCCESS) {
            fprintf(stderr," ddikick.x: tm_spawn failed.\n");
            Fatal_error(911);
         }


      /* ---------------------------------- *\
         No data server on single node runs
      \* ---------------------------------- */
         if(info->nnodes == 1) continue;


       # if DDI_DEBUG
         DEBUG_START(DEBUG_MAX)
         fprintf(stdout,"DDI Process %i PBS tm_spawn arguments: ",j);
         for(iarg=0; iarg<r; iarg++) fprintf(stdout,"%s ",rargs[iarg]);
         fprintf(stdout,"\n");
         DEBUG_END()
       # endif

         j = i+np;
         sprintf(procid,"%d",j);
         
      /* --------------------- *\
         Spawn DDI Data Server
      \* --------------------- */
         if(tm_spawn(r,rargs,NULL,*(nodelist+i),(tid+j),spawn+j) != TM_SUCCESS) {
            fprintf(stderr," ddikick.x: tm_spawn failed.\n");
            Fatal_error(911);
      }  }


   /* -------------------------------------------------------- *\
      Poll PBS to ensure each DDI process started successfully
   \* -------------------------------------------------------- */
      ntests = np-ddinodes[0].cpus;
      if(USING_DATA_SERVERS())  ntests *= 2;

      for(i=ntests; i--; ) {
         if(tm_poll(TM_NULL_EVENT,&polled,1,&tm_errno) != TM_SUCCESS) {
            fprintf(stderr," ddikick.x: tm_poll failed.\n");
            Fatal_error(911);
         }
         
         for(j=0; j<np; j++) {
            if(polled == *(spawn+j)) {
               if(tm_errno) {
                  fprintf(stderr," ddikick.x: error spawning DDI task %i.\n",j);
                  Fatal_error(911);
               } else {
                # if DDI_DEBUG
                  DEBUG_START(DEBUG_MAX)
                  fprintf(stdout," ddikick.x: DDI task %i started.\n",j);
                  DEBUG_END()
                # endif
            }  }

            if(info->nnodes == 1) continue;

            if(polled == *(spawn+j+np)) {
               if(tm_errno) {
                  fprintf(stderr," ddikick.x: error spawning DDI task %i.\n",j+np);
                  Fatal_error(911);
               } else {
                # if DDI_DEBUG
                  DEBUG_START(DEBUG_MAX)
                  fprintf(stdout," ddikick.x: DDI task %i started.\n",j+np);
                  DEBUG_END()
                # endif
      }  }  }  }

      
   /* -------------------------------------- *\
      Close the link to the PBS Task Manager
   \* -------------------------------------- */
      tm_finalize();


   /* ---------------- *\
      Free used memory
   \* ---------------- */
      free(tid);
      free(spawn);
      free(rargs);      
   }
Exemplo n.º 30
0
   void DDI_GetAcc_lapi_server(DDI_Patch *patch, void *buffer) {

   /* --------------- *\
      Local Variables 
   \* --------------- */
      lapi_handle_t hndl;
      uint tgt;
      ulong len;
      void *tgt_addr,*org_addr;
      lapi_cntr_t *tgt_cntr,*org_cntr,*cmpl_cntr;
      lapi_cntr_t local_cntr;

      STD_DEBUG((stdout,"%s: Entering DDI_GetAcc_lapi_server.\n",DDI_Id()))

   /* ----------------------------------------- *\
      Set up the calling arguments for LAPI_Get 
   \* ----------------------------------------- */
      hndl      = gv(lapi_hnd);                        /* LAPI Handle */
      tgt       = patch->cp_lapi_id;                   /* Target for the LAPI_Get */
      len       = (ulong) patch->size;                 /* Amount of data to get */
      tgt_addr  = patch->cp_buffer_addr;               /* Addr at target to get */
      org_addr  = buffer;                              /* Local Addr for data that is got */
      tgt_cntr  = (lapi_cntr_t *) patch->cp_lapi_cntr; /* Target counter */ 
      org_cntr  = &local_cntr;                         /* Local counter -> incremented once
                                                          the LAPI_Get is completed */
      cmpl_cntr = NULL;
                                                  
      ULTRA_DEBUG((stdout,"%s: DDI_Patch -> ilo=%i ihi=%i jlo=%i jhi=%i size=%lu.\n",
                   DDI_Id(),patch->ilo,patch->ihi,patch->jlo,patch->jhi,patch->size))
      ULTRA_DEBUG((stdout,"%s: org buffer_addr=%x cntr_addr=%x.\n",DDI_Id(),org_addr,org_cntr))
      ULTRA_DEBUG((stdout,"%s: tgt id=%i buffer_addr=%x cntr_addr=%x.\n",DDI_Id(),tgt,tgt_addr,tgt_cntr))


   /* -------------------------------------------------------------------------- *\
      We are interested in when the LAPI_Get is finished, zero the local counter
   \* -------------------------------------------------------------------------- */
      if(LAPI_Setcntr(hndl,org_cntr,0) != LAPI_SUCCESS) {
         fprintf(stdout,"%s: LAPI_Setcntr failed in DDI_GetAcc_lapi_server.\n",DDI_Id());
         Fatal_error(911);
      }
      ULTRA_DEBUG((stdout,"%s: Initializing local LAPI counter.\n",DDI_Id()))


   /* --------------------------------------------------- *\
      Execute the LAPI_Get.  This is a non-blocking call.
   \* --------------------------------------------------- */
      if(LAPI_Get(hndl,tgt,len,tgt_addr,org_addr,tgt_cntr,org_cntr) != LAPI_SUCCESS) {
         fprintf(stdout,"%s: LAPI_Get failed in DDI_GetAcc_lapi_server.\n",DDI_Id());
         Fatal_error(911);
      }
      MAX_DEBUG((stdout,"%s: Executing LAPI_Get from %i.\n",DDI_Id(),tgt))


   /* ------------------------------------------------------------------------ *\
      Wait here until the local counter is incremented ==> LAPI_Get completed.
   \* ------------------------------------------------------------------------ */
      if(LAPI_Waitcntr(hndl,org_cntr,1,NULL) != LAPI_SUCCESS) {
         fprintf(stdout,"%s: LAPI_Waitcntr failed in DDI_GetAcc_lapi_server.\n",DDI_Id());
         Fatal_error(911);
      }
      MAX_DEBUG((stdout,"%s: LAPI_Get from %i completed.\n",DDI_Id(),tgt))
      

   /* -------------------------------------------------------------- *\
      Place the data (now local) into the shared-memory of the node.
   \* -------------------------------------------------------------- */
      MAX_DEBUG((stdout,"%s: LAPI handler calling DDI_GetAcc_local.\n",DDI_Id()))
      DDI_GetAcc_local(patch,buffer);
      MAX_DEBUG((stdout,"%s: LAPI handler completed DDI_GetAcc_local.\n",DDI_Id()))
      STD_DEBUG((stdout,"%s: Exiting DDI_GetAcc_lapi_server.\n",DDI_Id()))


   /* --------------------------------------------------- *\
      Execute the LAPI_Put.  This is a non-blocking call.
   \* --------------------------------------------------- */
      if(LAPI_Put(hndl,tgt,len,tgt_addr,org_addr,tgt_cntr,org_cntr,cmpl_cntr) != LAPI_SUCCESS) {
         fprintf(stdout,"%s: LAPI_Get failed in DDI_GetAcc_lapi_server.\n",DDI_Id());
         Fatal_error(911);
      }
      MAX_DEBUG((stdout,"%s: Executing LAPI_Put from %i.\n",DDI_Id(),tgt))


   /* ------------------------------------------------------------------------- *\
      Wait here until the local counter is incremented ==> LAPI_Put has copied.
   \* ------------------------------------------------------------------------- */
      if(LAPI_Waitcntr(hndl,org_cntr,1,NULL) != LAPI_SUCCESS) {
         fprintf(stdout,"%s: LAPI_Waitcntr failed in DDI_GetAcc_lapi_server.\n",DDI_Id());
         Fatal_error(911);
      }
      MAX_DEBUG((stdout,"%s: LAPI_Put copied data enroute to %i.\n",DDI_Id(),tgt))

   }