示例#1
0
void COMM_COLL_END(MPI_Comm comm, int root, int type, void **ptr)
{
  int ts, edge;
  struct minnode
  {
    double d;
    int n;
  } loc, glob;
  mytime_t timing_diff;

  timing_diff =
    mytime_convertToSec(mytime_timeDiff(timing_start, mytime_getTimeStamp()));

  critpath = 1;

  if (after_barrier)
    {
      ts = local_timestamp + 1;
      cp_setnode(-1, ts, -1, critpath);
      cp_setedge(-1, local_timestamp, -1, ts, 1);
    }
  else
    {
      /* find timestamps */

      PMPI_Allreduce(&local_timestamp, &ts, 1, MPI_INT, MPI_MAX, comm);
      ts++;

      /* find the minimal wait node */

      loc.d = timing_diff;
      loc.n = critpath_myid;
      PMPI_Allreduce(&loc, &glob, 1, MPI_DOUBLE_INT, MPI_MINLOC, comm);

      /* set barrier node */

      cp_setnode(-1, ts, -1, critpath);
      if (glob.n == critpath_myid)
        edge = 1;
      else
        edge = 0;

      if (edge)
        cp_setedge(critpath_myid, local_timestamp, -1, ts, edge);
    }

  /* set new timestamp and critpath */

  local_timestamp = ts;
  after_barrier = 1;
  barrier_number++;
}
示例#2
0
文件: wrapper.c 项目: rscohn2/MLSL
int MPI_Allreduce(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
{
    int ret;
    cqueue_t* mycqueue = handle_get_cqueue(comm);

    if (mycqueue != NULL)
    {
        MPI_Request tmprequest;
        cqueue_iallreduce(mycqueue, sendbuf, recvbuf, count, datatype, op, comm, &tmprequest);
        return MPI_Wait(&tmprequest, MPI_STATUS_IGNORE);
    }
    else
    {
	if (std_mpi_mode == STD_MPI_MODE_IMPLICIT && max_ep > 0)
        {
            int num_ep = 1;
            if (count >= std_mpi_mode_implicit_allreduce_threshold)
              num_ep = max_ep;

            for (int epid = 0; epid < num_ep; epid++)
            {
                long start, chunk;
                GET_MESSAGE_PAYLOAD(epid, num_ep, count, chunk, start);
                block_coll_request[epid] = MPI_REQUEST_NULL;
                ret = cqueue_iallreduce(client_get_cqueue(epid), (float*)sendbuf + start, (float*)recvbuf + start,
                                        chunk, datatype, op, comm, &block_coll_request[epid]);
            }
            return MPI_Waitall(num_ep, block_coll_request, MPI_STATUS_IGNORE);
        }
        return PMPI_Allreduce(sendbuf, recvbuf, count, datatype, op, comm);
    }

    return ret;
}
示例#3
0
int CollChk_check_buff(MPI_Comm comm, void *buff, char* call)
{
#if defined( HAVE_MPI_IN_PLACE )
    int   num_buffs_in_place;
    int   is_consistent;
    int   rank, size;
    char  err_str[COLLCHK_STD_STRLEN];

    /* get the rank and size */
    MPI_Comm_size(comm, &size);

    num_buffs_in_place = (buff == MPI_IN_PLACE);
    PMPI_Allreduce( MPI_IN_PLACE, &num_buffs_in_place, 1, MPI_INT,
                    MPI_SUM, comm );
    is_consistent = (num_buffs_in_place == 0 || num_buffs_in_place == size);
    if ( !is_consistent ) {
        MPI_Comm_rank(comm, &rank);
        sprintf(err_str,"Inconsistent use of MPI_IN_PLACE is detected "
                        "at rank %d.\n", rank);
    }
    else
        sprintf(err_str, COLLCHK_NO_ERROR_STR);

    if (!is_consistent) {
        return CollChk_err_han(err_str, COLLCHK_ERR_INPLACE, call, comm);
    }
#endif

    return MPI_SUCCESS;
}
示例#4
0
文件: mpiPi.c 项目: saxena/mpip
static int
mpiPi_mergept2ptHashStats ()
{
  int ac;
  pt2pt_stats_t **av;
  int totalCount = 0;

  if (mpiPi.do_pt2pt_detail_report)
    {
      /* gather local task data */
      h_gather_data (mpiPi.task_pt2pt_stats, &ac, (void ***) &av);

      /* Make sure we have data to collect, otherwise skip */
      PMPI_Allreduce (&ac, &totalCount, 1, MPI_INT, MPI_SUM, mpiPi.comm);

      mpiPi_msg_debug("(%d) Merging pt2pt stats: totalCount: %d\n",
		      mpiPi.rank, totalCount);

      if (totalCount < 1)
	{
	  if (mpiPi.rank == mpiPi.collectorRank)
	    {
	      mpiPi_msg_warn
		("Collector found no records to merge. Omitting report.\n");
	    }
	  return 1;
	}

      /* Gather the ac for all ranks at the root */
      if (mpiPi.rank == mpiPi.collectorRank)
	{
	  mpiPi.accumulatedPt2ptCounts = (int*)calloc(mpiPi.size, sizeof(int));
	  assert(mpiPi.accumulatedPt2ptCounts);
	}

      PMPI_Gather(&ac, 1, MPI_INT, mpiPi.accumulatedPt2ptCounts,
		  1, MPI_INT, mpiPi.collectorRank, mpiPi.comm);

      /* gather global data at collector */
      if (mpiPi.rank == mpiPi.collectorRank)
	{
	  mpiPi_recv_pt2pt_stats(ac,av);
	}
      else
	{
	  /* Send all pt2pt data to collector */
	  mpiPi_send_pt2pt_stats(ac,av);
	}
    }

  return 1;

}
示例#5
0
void ompi_allreduce_f(char *sendbuf, char *recvbuf, MPI_Fint *count,
		     MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm,
		     MPI_Fint *ierr)
{
    int ierr_c;
    MPI_Comm c_comm;
    MPI_Datatype c_type;
    MPI_Op c_op;

    c_comm = PMPI_Comm_f2c(*comm);
    c_type = PMPI_Type_f2c(*datatype);
    c_op = PMPI_Op_f2c(*op);

    sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf);
    sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf);
    recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf);

    ierr_c = PMPI_Allreduce(sendbuf, recvbuf,
                            OMPI_FINT_2_INT(*count),
                            c_type, c_op, c_comm);
    if (NULL != ierr) *ierr = OMPI_INT_2_FINT(ierr_c);
}
示例#6
0
void ipm_log(void) { /* called by all tasks (or as many as possible) */
    int i,ii,rv,icall,ibytes,irank,ireg,kreg;
    int ipm_log_fd=-1, fan_out=0;
    int log_rank=-1, search_offset=0, token=1;
    FILE *ipm_mpi_log_fh;
    MPI_File ipm_mpiio_log_fh;
    DIR  *ipm_mpi_log_dp;
    struct dirent *de;
    struct stat file_stat;
    IPM_KEY_TYPE key,ikey;
    char *cp, txt[MAXSIZE_TXTLINE];
    char tmp_fname[MAXSIZE_FILENAME];
    char tmp_pref[MAXSIZE_FILENAME];
    char tmp_cmd[MAXSIZE_FILENAME];
    double b_flops;
    double stamp1, stamp2, stamp3, stamp4;
    MPI_Status s[4];
    MPI_Info outinfo;


    if(task.flags & IPM_WROTELOG) return;
    memset((void *)txt,0,(size_t)(MAXSIZE_TXTLINE*sizeof(char)));
    task.flags |= IPM_WROTELOG;

    /* only one chance, even if we fail at this point we should not return  */

    if(task.flags & DEBUG) {
        printf("IPM: %d log enter job.cookie=%s username=%s \n",
               task.mpi_rank,
               job.cookie,
               job.username);
        fflush(stdout);
    }

    /*
    ** bail
    */


    if(strcmp(job.log_dir, "/dev/null") == 0 ) {
        if(task.flags & DEBUG) {
            printf("IPM: %d log exit due to LOGDIR=/dev/null", task.mpi_rank);
        }
        return;
    }

    if(stat(job.log_dir,&file_stat)) {
        if(!task.mpi_rank) {
            printf("IPM: %d log IPMLOG_DIR %s not available using $CWD \n",
                   task.mpi_rank,  job.log_dir);
        }
        sprintf(job.log_dir, "./");
    }


    /*
    ** Aggregation method #1 : Multiple Files - No Aggregation
    IPM_LOG_USEMULTI
    ** Aggregation method #2 : Single File    - Locks
    IPM_LOG_USELOCKS
    ** Aggregation method #3 : Single File    - SMP & /tmp
    IPM_LOG_USETMPFS
    ** Aggregation method #4 : Single File    - MPI  - default
    IPM_LOG_USEMPI
    */

#ifndef IPM_LOG_USEMULTI
#ifndef IPM_LOG_USELOCKS
#ifndef IPM_LOG_USETMPFS
#ifndef IPM_LOG_USEMPI
#endif
#endif
#endif
#endif


#ifdef IPM_LOG_USEMULTI
    sprintf(job.log_fname,"%s/%s.%s.%d",
            job.log_dir,
            job.username,
            job.cookie,
            task.mpi_rank);
#else
    if (!strcmp(job.log_fname,"unset")) {
        sprintf(job.log_fname,"%s/%s.%s.%d",
                job.log_dir,
                job.username,
                job.cookie,0);
    }
    else
    {
        sprintf(tmp_fname,"%s/%s",job.log_dir,job.log_fname);
        sprintf(job.log_fname,"%s",tmp_fname);
    }
#endif

    if(task.flags & DEBUG) {
        printf("IPM: %d log IPMLOG_DIR=%s FNAME=%s \n",
               task.mpi_rank,
               job.log_dir,
               job.log_fname);
    }
    /*
    ** Aggregation method #1 : Multiple Files - No Aggregation  {
    */

#ifdef IPM_LOG_USEMULTI
    /* simplest case no locking just write each file. Parallel FS may
       have metadata storm for N file creates */

    ipm_mpi_log_fh = fopen(job.log_fname,"w");
    if(ipm_mpi_log_fh == NULL) {
        printf("IPM: %d log fopen failed fname=%s \n",
               task.mpi_rank,
               job.log_fname);
        fflush(stdout);
    }
    rv = fprintf(ipm_mpi_log_fh,
                 "<?xml version=\"1.0\" encoding=\"iso-8859-1\"?>\n<ipm>\n");
    ipm_log_write_task(&job, &task, txt, ipm_mpi_log_fh);
    rv = fprintf(ipm_mpi_log_fh,
                 "</ipm>\n");
    fclose(ipm_mpi_log_fh);
    chmod(job.log_fname, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);

    return;
#endif
    /* } */

    /*
    ** Aggregation methods #2 : Single File    - Locks {
    */
#ifdef IPM_LOG_USELOCKS
    signal(SIGALRM, ipm_alarm_log_block);
    alarm(30);
    IPM_TIME_GTOD(stamp1);
    if(task.flags & DEBUG) {
        printf("IPM: %d log block_lock fname=%s fd=%d stamp=%f \n",
               task.mpi_rank,
               job.log_fname,
               ipm_log_fd,
               stamp1
              );
        fflush(stdout);
    }

    IPM_FILE_LOCK(job.log_fname,ipm_log_fd);

    IPM_TIME_GTOD(stamp2);
    if(task.flags & DEBUG) {
        printf("IPM: %d log block_lock fname=%s fd=%d stamp=%12.6f delta=%.3e \n",
               task.mpi_rank,
               job.log_fname,
               ipm_log_fd,
               stamp2, stamp2-stamp1
              );
        fflush(stdout);
    }


    alarm(0);
    signal(SIGALRM, SIG_DFL);

    ipm_mpi_log_fh = fdopen(ipm_log_fd,"w+");

    if(!ipm_mpi_log_fh || !ipm_log_fd) {
        /* fail silently */
        return;
    }

    /* got log fh */
    fseek(ipm_mpi_log_fh,0,SEEK_END);
    ipm_log_write_task(&job, &task, txt, ipm_mpi_log_fh);

    IPM_TIME_GTOD(stamp3);
    if(task.flags & DEBUG) {
        printf("IPM: %d log write fname=%s fd=%d stamp=%12.6f delta=%.3e \n",
               task.mpi_rank,
               job.log_fname,
               ipm_log_fd,
               stamp3, stamp3-stamp2
              );
        fflush(stdout);
    }


    fflush(ipm_mpi_log_fh);
    IPM_FILE_UNLOCK(job.log_fname,ipm_log_fd);

    IPM_TIME_GTOD(stamp4);
    if(task.flags & DEBUG) {
        printf("IPM: %d log unlock fname=%s fd=%d stamp=%12.6f delta=%.3e \n",
               task.mpi_rank,
               job.log_fname,
               ipm_log_fd,
               stamp4, stamp4-stamp3
              );
        fflush(stdout);
    }

#endif
    /* } */

    /*
    ** Aggregation method #3 : Single File    - SMP & /tmp {
    */
#ifdef IPM_LOG_USETMPFS

    if(task.flags & IPM_MPI_FINALIZING) {
        if(task.mpi_size == 1) { /* special easy case  now uneeded */
        }

        sprintf(tmp_fname,"/tmp/%s.%s.%d",
                job.username,
                job.cookie,
                task.mpi_rank);
        ipm_mpi_log_fh = fopen(tmp_fname,"w");
        if(ipm_mpi_log_fh == NULL) {
            printf("IPM: %d log fopen failed fname=%s \n",
                   task.mpi_rank,
                   tmp_fname);
            fflush(stdout);
        }
        chmod(tmp_fname, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);
        ipm_log_write_task(&job, &task, txt, ipm_mpi_log_fh);
        fclose(ipm_mpi_log_fh);


        /* host local ring barrier so that /tmp is all good */

        if(task.intra_size > 1) {
            if(task.intra_root == task.mpi_rank) {
                PMPI_Send(&i,1,MPI_INT,task.intra_right,0,MPI_COMM_WORLD);
                PMPI_Recv(&i,1,MPI_INT,task.intra_left,0,MPI_COMM_WORLD,s);
            } else {
                PMPI_Recv(&i,1,MPI_INT,task.intra_left,0,MPI_COMM_WORLD,s);
                PMPI_Send(&i,1,MPI_INT,task.intra_right,0,MPI_COMM_WORLD);
            }
        }

        if(task.intra_root == task.mpi_rank) {
            if(job.nhosts > 1 && task.mpi_rank) {
                PMPI_Recv(&i,1,MPI_INT,task.inter_left,0,MPI_COMM_WORLD,s);
            }
            /* sh -c lacks PATH on some system so remove popen&system where possible */

            sprintf(tmp_cmd, "/usr/bin/cat /tmp/%s.%s.* >> %s",
                    job.username,
                    job.cookie,
                    job.syslog_fname);
            system(tmp_cmd);

            chmod(job.syslog_fname, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);
            sprintf(tmp_cmd, "/usr/bin/rm -f /tmp/%s.%s.* ",
                    job.username,
                    job.cookie);
            system(tmp_cmd);

            /* ugh! duplicating cat and rm is yucky
               sprintf(tmp_pref,"%s.%s", job.username, job.cookie);
               dp=opendir("/tmp");
               if(dp) {
               while(de=readdir(dp))!=NULL){
               if(!strncmp(de->d_name,tmp_pref, strlen(tmp_fname))) {
               sprintf(tmp_fname,"/tmp/%s", de->d_name);
               fopen(tmp_fname,"r"
               read in pieces and write them to the intra-node file
               delete the .rank file
               }
               }
               }
            */

            if(job.nhosts > 1 && task.inter_right != 0) {
                PMPI_Send(&i,1,MPI_INT,task.inter_right,0,MPI_COMM_WORLD);
            }


        }
        return;
    }
#endif
    /* } */

    /*
    ** Aggregation method #4 : Single File    - MPI  {
    */
#ifdef IPM_LOG_USEMPI

    if (task.flags & PARALLEL_IO_LOG ) {
        int buff_size=0;
        MPI_Offset file_offset=0;
        int64_t buff_sum=0;
        int malloc_flag,malloc_sum;
        char* buffer=NULL;
        MPI_Info info;


        /* measure size of buff required */
        buff_size=ipm_log_write(&job, &task, txt, buffer,0,1);

        malloc_flag=1;
        buffer = (char*)malloc(buff_size+1);
        if (buffer == NULL) {
            malloc_flag=0;
        } else {
            rv=ipm_log_write(&job, &task, txt, buffer,buff_size,1);
        }

        /*see whether malloc suceeded across all mpi tasks */
        PMPI_Allreduce(&malloc_flag,&malloc_sum,1,MPI_INT,MPI_SUM,MPI_COMM_WORLD);
        if (malloc_sum == task.mpi_size)  {/* use parallel IO */

            if(task.flags & DEBUG && !task.mpi_rank) {
                printf("IPM: %d IPM report parallel IO used\n", task.mpi_rank);
            }

            PMPI_Info_create(&info);
#ifndef CRAY_GPFS_BUG
            PMPI_Info_set(info,"access_style","write_once");
            PMPI_Info_set(info,"collective_buffering","true");
            PMPI_Info_set(info,"file_perm","0644");
            PMPI_Info_set(info,"romio_cb_read","true");
            PMPI_Info_set(info,"cb_align","2");
            PMPI_Info_set(info,"romio_cb_write","true");
            PMPI_Info_set(info,"cb_config_list","*:1");
            PMPI_Info_set(info,"striping_factor","80");

            PMPI_Info_set(info,"IBM_largeblock_io","true");
#endif

            /* with allowing the user to choose the filename - can overwrite an old */
            /* file - which would be fine if MPI-IO allowed TRUNC - but it doesn't */
            /* so we just delete so that we don't end up with trailing garbage  */
            if (!task.mpi_rank) rv=PMPI_File_delete ( job.log_fname,MPI_INFO_NULL);
            rv=PMPI_Barrier(MPI_COMM_WORLD);

            rv = PMPI_File_open( MPI_COMM_WORLD, job.log_fname, MPI_MODE_WRONLY | MPI_MODE_CREATE,info,  &ipm_mpiio_log_fh );
            if (rv)
            {
                printf("IPM: %d syslog fopen failed fname=%s \n",
                       task.mpi_rank,
                       job.log_fname);
                fflush(stdout);
                return;
            }
            /* workaround for cases when MPI_INTEGER8 is not defined */
#ifndef MPI_INTEGER8
#define MPI_INTEGER8 MPI_LONG_LONG_INT
#endif

            if (task.mpi_size > 1) {
                if (task.mpi_rank == 0) {
                    buff_sum+=(int64_t)buff_size;
                    PMPI_Send (&buff_sum,1,MPI_INTEGER8,1,0,MPI_COMM_WORLD);
                    file_offset=0;
                } else if (task.mpi_rank == (task.mpi_size-1)) {
                    PMPI_Recv (&buff_sum,1,MPI_INTEGER8,task.mpi_rank-1,0,MPI_COMM_WORLD,MPI_STATUS_IGNORE);
                    file_offset=(MPI_Offset)buff_sum;
                } else {
                    PMPI_Recv (&buff_sum,1,MPI_INTEGER8,task.mpi_rank-1,0,MPI_COMM_WORLD,MPI_STATUS_IGNORE);
                    file_offset=(MPI_Offset)buff_sum;
                    buff_sum+=(int64_t)buff_size;
                    PMPI_Send (&buff_sum,1,MPI_INTEGER8,task.mpi_rank+1,0,MPI_COMM_WORLD);
                }
            }

            rv=PMPI_File_set_view(ipm_mpiio_log_fh,file_offset,MPI_CHAR, MPI_CHAR,"native",info);

            /*write info*/
            rv=PMPI_File_write_all(ipm_mpiio_log_fh,buffer,buff_size,MPI_CHAR,MPI_STATUS_IGNORE);


            rv = PMPI_File_close( &ipm_mpiio_log_fh );
            PMPI_Barrier(MPI_COMM_WORLD);
            /* Some MPI-IO implimentations (cray) permissions are not setable with hints */
            if (task.mpi_rank == 0) chmod (job.log_fname,0744);
            free (buffer);
            return;
        } else {
            if (! task.mpi_rank) printf("IPM: %d Allocation of IO Buffer failed on one or more tasks\n",task.mpi_rank);
        }
    }
    /*parallel IO failed */
    if (! task.mpi_rank) printf("IPM: %d Using serial IO\n",task.mpi_rank);

    /*************************************/
    /* write log from rank zero using MPI*/
    /*************************************/
    if(task.mpi_rank==0) {
        ipm_mpi_log_fh = fopen(job.log_fname,"w+");
        if(ipm_mpi_log_fh == NULL) {
            printf("IPM: %d syslog fopen failed fname=%s \n",
                   task.mpi_rank,
                   job.log_fname);
            fflush(stdout);
        }
        chmod(job.log_fname, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);

        ipm_log_write(&job, &task, txt,ipm_mpi_log_fh ,0,0);
        /* we now pollute the local profile state irrevocably in the interest
           of keeping a memory footprint  which is a constant independent of
           concurrency */

        /* task 0 initiates a volley of Sends via a handshake */
        for(i=1; i<job.ntasks; i++) {
            PMPI_Send(&token,1,MPI_INT,i,0,MPI_COMM_WORLD);
            PMPI_Recv(&job,sizeof(struct ipm_jobdata),MPI_BYTE,i,0,MPI_COMM_WORLD,s+0);
            PMPI_Recv(&task,sizeof(struct ipm_taskdata),MPI_BYTE,i,1,MPI_COMM_WORLD,s+1);
            PMPI_Recv(&(txt[0]),MAXSIZE_TXTLINE,MPI_CHAR,i,1,MPI_COMM_WORLD,s+1);

            ipm_log_write(&job, &task, txt,ipm_mpi_log_fh ,0,0);
        }
        fclose(ipm_mpi_log_fh);
    } else {
        PMPI_Recv(&token,1,MPI_INT,0,0,MPI_COMM_WORLD,s+0);
        PMPI_Send(&job,sizeof(struct ipm_jobdata),MPI_BYTE,0,0,MPI_COMM_WORLD);
        PMPI_Send(&task,sizeof(struct ipm_taskdata),MPI_BYTE,0,1,MPI_COMM_WORLD);
        PMPI_Send(&(txt[0]),MAXSIZE_TXTLINE,MPI_CHAR,0,1,MPI_COMM_WORLD);
    }
    PMPI_Barrier(MPI_COMM_WORLD);

    return;
#endif

    return;
}
示例#7
0
int
mpiPi_mergeResults ()
{
  int ac;
  callsite_stats_t **av;
  int totalCount = 0;
  int maxCount = 0;
  int retval = 1, sendval;

  /* gather local task data */
  h_gather_data (mpiPi.task_callsite_stats, &ac, (void ***) &av);

  /* determine size of space necessary on collector */
  PMPI_Allreduce (&ac, &totalCount, 1, MPI_INT, MPI_SUM, mpiPi.comm);
  PMPI_Reduce (&ac, &maxCount, 1, MPI_INT, MPI_MAX, mpiPi.collectorRank,
	       mpiPi.comm);

  if (totalCount < 1)
    {
      mpiPi_msg_warn
	("Collector found no records to merge. Omitting report.\n");
      return 0;
    }

  /* gather global data at collector */
  if (mpiPi.rank == mpiPi.collectorRank)
    {
      int i;
      int ndx = 0;

#ifdef ENABLE_BFD
      if (mpiPi.appFullName != NULL)
	{
	  if (open_bfd_executable (mpiPi.appFullName) == 0)
	    mpiPi.do_lookup = 0;
	}
#elif defined(USE_LIBDWARF)
      if (mpiPi.appFullName != NULL)
	{
	  if (open_dwarf_executable (mpiPi.appFullName) == 0)
	    mpiPi.do_lookup = 0;
	}
#endif
#if defined(ENABLE_BFD) || defined(USE_LIBDWARF)
      else
	{
	  mpiPi_msg_warn ("Failed to open executable\n");
	  mpiPi.do_lookup = 0;
	}
#endif
      /* convert data to src line; merge, if nec */
      mpiPi.global_callsite_stats = h_open (mpiPi.tableSize,
					    mpiPi_callsite_stats_src_hashkey,
					    mpiPi_callsite_stats_src_comparator);
      mpiPi.global_callsite_stats_agg = h_open (mpiPi.tableSize,
						mpiPi_callsite_stats_src_id_hashkey,
						mpiPi_callsite_stats_src_id_comparator);
      if (callsite_pc_cache == NULL)
	{
	  callsite_pc_cache = h_open (mpiPi.tableSize,
				      callsite_pc_cache_hashkey,
				      callsite_pc_cache_comparator);
	}
      if (callsite_src_id_cache == NULL)
	{
	  callsite_src_id_cache = h_open (mpiPi.tableSize,
					  callsite_src_id_cache_hashkey,
					  callsite_src_id_cache_comparator);
	}
      /* Try to allocate space for max count of callsite info from all tasks  */
      mpiPi.rawCallsiteData =
	(callsite_stats_t *) calloc (maxCount, sizeof (callsite_stats_t));
      if (mpiPi.rawCallsiteData == NULL)
	{
	  mpiPi_msg_warn
	    ("Failed to allocate memory to collect callsite info");
	  retval = 0;
	}

      /* Clear global_mpi_time and global_mpi_size before accumulation in mpiPi_insert_callsite_records */
      mpiPi.global_mpi_time = 0.0;
      mpiPi.global_mpi_size = 0.0;

      if (retval == 1)
	{
	  /* Insert collector callsite data into global and task-specific hash tables */
	  for (ndx = 0; ndx < ac; ndx++)
	    {
	      mpiPi_insert_callsite_records (av[ndx]);
	    }
	  ndx = 0;
	  for (i = 1; i < mpiPi.size; i++)	/* n-1 */
	    {
	      MPI_Status status;
	      int count;
	      int j;

	      /* okay in any order */
	      PMPI_Probe (MPI_ANY_SOURCE, mpiPi.tag, mpiPi.comm, &status);
	      PMPI_Get_count (&status, MPI_CHAR, &count);
	      PMPI_Recv (&(mpiPi.rawCallsiteData[ndx]), count, MPI_CHAR,
			 status.MPI_SOURCE, mpiPi.tag, mpiPi.comm, &status);
	      count /= sizeof (callsite_stats_t);


	      for (j = 0; j < count; j++)
		{
		  mpiPi_insert_callsite_records (&(mpiPi.rawCallsiteData[j]));
		}
	    }
	  free (mpiPi.rawCallsiteData);
	}
    }
  else
    {
      int ndx;
      char *sbuf = (char *) malloc (ac * sizeof (callsite_stats_t));
      for (ndx = 0; ndx < ac; ndx++)
	{
	  bcopy (av[ndx],
		 &(sbuf[ndx * sizeof (callsite_stats_t)]),
		 sizeof (callsite_stats_t));
	}
      PMPI_Send (sbuf, ac * sizeof (callsite_stats_t),
		 MPI_CHAR, mpiPi.collectorRank, mpiPi.tag, mpiPi.comm);
      free (sbuf);
    }
  if (mpiPi.rank == mpiPi.collectorRank && retval == 1)
    {
      if (mpiPi.collective_report == 0)
	mpiPi_msg_debug
	  ("MEMORY : Allocated for global_callsite_stats     : %13ld\n",
	   h_count (mpiPi.global_callsite_stats) * sizeof (callsite_stats_t));
      mpiPi_msg_debug
	("MEMORY : Allocated for global_callsite_stats_agg : %13ld\n",
	 h_count (mpiPi.global_callsite_stats_agg) *
	 sizeof (callsite_stats_t));
    }

  /* TODO: need to free all these pointers as well. */
  free (av);

  if (mpiPi.rank == mpiPi.collectorRank)
    {
      if (mpiPi.do_lookup == 1)
	{
#ifdef ENABLE_BFD
	  /* clean up */
	  close_bfd_executable ();
#elif defined(USE_LIBDWARF)
	  close_dwarf_executable ();
#endif
	}
    }

  /*  Quadrics MPI does not appear to support MPI_IN_PLACE   */
  sendval = retval;
  PMPI_Allreduce (&sendval, &retval, 1, MPI_INT, MPI_MIN, mpiPi.comm);
  return retval;
}
示例#8
0
int MPI_Allreduce(void *sendbuf, void *recvbuf, int count,
                  MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
{
  return PMPI_Allreduce(sendbuf, recvbuf, count, datatype, op, comm);
}