Example #1
0
/*
Major reconstruction of memory management for -off_cache flag
*/
void IMB_init_buffers_iter(struct comm_info* c_info, struct iter_schedule* ITERATIONS,
                           struct Bench* Bmark, MODES BMODE, int iter, int size)
/*


                      Initializes communications buffers (call set_buf)
                      Initializes iterations scheduling


Input variables:


-Bmark                (type struct Bench*)
                      (For explanation of struct Bench type:
                      describes all aspects of modes of a benchmark;
                      see [1] for more information)

                      Current benchmark

-BMODE                (type MODES)
                      aggregate / non aggregate

-iter                 (type int)
                      number of current iteration of message size loop

-size                 (type int)
                      Message size


In/out variables:

-c_info               (type struct comm_info*)
                      Collection of all base data for MPI;
                      see [1] for more information

                      Communications buffers are allocated and assigned values

-ITERATIONS           (type struct iter_schedule*)
                      Adaptive number of iterations, out of cache scheduling are
                      setup if requested



*/
/* >> IMB 3.1  */
{
    /* IMB 3.1 << */
    size_t s_len, r_len, s_alloc, r_alloc;
    int init_size, irep, i_s, i_r, x_sample;
    const int root_based = has_root(Bmark->name);


    x_sample = BMODE->AGGREGATE ? ITERATIONS->msgspersample : ITERATIONS->msgs_nonaggr;

    /* July 2002 fix V2.2.1: */
#if (defined EXT || defined MPIIO || RMA)
    if( Bmark->access==no ) x_sample=ITERATIONS->msgs_nonaggr;
#endif

    ITERATIONS->n_sample = (size > 0)
                           ? max(1, min(ITERATIONS->overall_vol / size, x_sample))
                           : x_sample;

    Bmark->sample_failure = 0;

    init_size = max(size, asize);

    if (c_info->rank < 0) {
        return;
    } else {

        if (ITERATIONS->iter_policy == imode_off) {
            ITERATIONS->n_sample = x_sample = ITERATIONS->msgspersample;
        } else if ((ITERATIONS->iter_policy == imode_multiple_np) || (ITERATIONS->iter_policy == imode_auto && root_based)) {
            /* n_sample for benchmarks with uneven distribution of works
               must be greater or equal and multiple to num_procs.
               The formula below is a negative leg of hyperbola.
               It's moved and scaled relative to max message size
               and initial n_sample subject to multiple to num_procs.
            */
            double d_n_sample = ITERATIONS->msgspersample;
            int max_msg_size = 1<<c_info->max_msg_log;
            int tmp = (int)(d_n_sample*max_msg_size/(c_info->num_procs*init_size+max_msg_size)+0.5);
            ITERATIONS->n_sample = x_sample = max(tmp-tmp%c_info->num_procs, c_info->num_procs);
        } /* else as is */
    }

    if (
#ifdef MPI1
        !strcmp(Bmark->name,"Alltoall") || !strcmp(Bmark->name,"Alltoallv")
#elif defined NBC // MPI1
        !strcmp(Bmark->name, "Ialltoall")  || !strcmp(Bmark->name, "Ialltoall_pure")
        || !strcmp(Bmark->name, "Ialltoallv") || !strcmp(Bmark->name, "Ialltoallv_pure")
#else
        0
#endif // NBC // MPI1
    )
    {
        s_len = (size_t)c_info->num_procs * (size_t)init_size;
        r_len = (size_t)c_info->num_procs * (size_t)init_size;
    }
    else if (
#ifdef MPI1
        !strcmp(Bmark->name, "Allgather")   || !strcmp(Bmark->name, "Allgatherv")
        || !strcmp(Bmark->name, "Gather")      || !strcmp(Bmark->name, "Gatherv")
#elif defined NBC
        !strcmp(Bmark->name, "Iallgather")  || !strcmp(Bmark->name, "Iallgather_pure")
        || !strcmp(Bmark->name, "Iallgatherv") || !strcmp(Bmark->name, "Iallgatherv_pure")
        || !strcmp(Bmark->name, "Igather")     || !strcmp(Bmark->name, "Igather_pure")
        || !strcmp(Bmark->name, "Igatherv")    || !strcmp(Bmark->name, "Igatherv_pure")
#else // MPI1 // NBC
        0
#endif // MPI1 // NBC
    )
    {
        s_len = (size_t) init_size;
        r_len = (size_t) c_info->num_procs * (size_t)init_size;
    }
    else if( !strcmp(Bmark->name,"Exchange") )
    {
        s_len = 2 * (size_t)init_size;
        r_len = (size_t) init_size;
    }
    else if(
#ifdef MPI1
        !strcmp(Bmark->name,"Scatter") || !strcmp(Bmark->name,"Scatterv")
#elif defined NBC // MPI1
        !strcmp(Bmark->name,"Iscatter")  || !strcmp(Bmark->name,"Iscatter_pure")
        || !strcmp(Bmark->name,"Iscatterv") || !strcmp(Bmark->name,"Iscatterv_pure")
#else // NBC // MPI1
        0
#endif // NBC // MPI1
    )
    {
        s_len = (size_t)c_info->num_procs * (size_t)init_size;
        r_len = (size_t)init_size;
    } else if( !strcmp(Bmark->name,"Barrier") || /*!strcmp(Bmark->name,"Window") ||*/ !strcmp(Bmark->name,"Open_Close") ) {
        s_len = r_len = 0;
    }
    else if ( ! strcmp(Bmark->name,"Exchange_put") || ! strcmp(Bmark->name,"Exchange_get") )
    {
        s_len = 2 * (size_t)init_size;
        r_len = 2 * (size_t)init_size;
    }
    else if (! strcmp(Bmark->name,"Compare_and_swap") )
    {
        /* Compare_and_swap operations require 3 buffers, so allocate space for compare
         * buffers in our r_buffer */
        s_len = (size_t)init_size;
        r_len = 3 * (size_t)init_size;
    }
    else
    {
        s_len = r_len = (size_t) init_size;
    }

    /*===============================================*/
    /* the displ is declared as int by MPI1 standard
       If c_info->num_procs*init_size  exceed INT_MAX value there is no way to run this sample
     */
    if (
#ifdef MPI1
        !strcmp(Bmark->name,"Alltoallv")  ||
        !strcmp(Bmark->name,"Allgatherv") ||
        !strcmp(Bmark->name,"Scatterv")   ||
        !strcmp(Bmark->name,"Gatherv")
#elif defined NBC // MPI1
        !strcmp(Bmark->name,"Ialltoallv")  || !strcmp(Bmark->name,"Ialltoallv_pure")  ||
        !strcmp(Bmark->name,"Iallgatherv") || !strcmp(Bmark->name,"Iallgatherv_pure") ||
        !strcmp(Bmark->name,"Iscatterv")   || !strcmp(Bmark->name,"Iscatterv_pure")   ||
        !strcmp(Bmark->name,"Igatherv")    || !strcmp(Bmark->name,"Igatherv_pure")
#else // NBC // MPI1
        0
#endif // NBC // MPI1
    )
    {
        if( s_len > INT_MAX || r_len > INT_MAX) {
            Bmark->sample_failure = SAMPLE_FAILED_INT_OVERFLOW;
            return;
        }
    }
    /*===============================================*/

    /* IMB 3.1: new memory management for -off_cache */
    if (BMODE->type == Sync) {
        ITERATIONS->use_off_cache=0;
        ITERATIONS->n_sample=x_sample;
    } else {
#ifdef MPIIO
        ITERATIONS->use_off_cache=0;
#else
        ITERATIONS->use_off_cache = ITERATIONS->off_cache;
#endif
        if (ITERATIONS->off_cache) {
            if ( ITERATIONS->cache_size > 0) {
                size_t cls = (size_t) ITERATIONS->cache_line_size;
                size_t ofs = ( (s_len + cls - 1) / cls + 1 ) * cls;
                ITERATIONS->s_offs = ofs;
                ITERATIONS->s_cache_iter = min(ITERATIONS->n_sample,(2*ITERATIONS->cache_size*CACHE_UNIT+ofs-1)/ofs);
                ofs = ( ( r_len + cls -1 )/cls + 1 )*cls;
                ITERATIONS->r_offs = ofs;
                ITERATIONS->r_cache_iter = min(ITERATIONS->n_sample,(2*ITERATIONS->cache_size*CACHE_UNIT+ofs-1)/ofs);
            } else {
                ITERATIONS->s_offs=ITERATIONS->r_offs=0;
                ITERATIONS->s_cache_iter=ITERATIONS->r_cache_iter=1;
            }
        }
    }

#ifdef MPIIO
    s_alloc = s_len;
    r_alloc = r_len;
#else
    if( ITERATIONS->use_off_cache ) {
        s_alloc = max(s_len,ITERATIONS->s_cache_iter*ITERATIONS->s_offs);
        r_alloc = max(r_len,ITERATIONS->r_cache_iter*ITERATIONS->r_offs);
    } else {
        s_alloc = s_len;
        r_alloc = r_len;
    }
#endif

    c_info->used_mem = 1.f*(s_alloc+r_alloc)/MEM_UNIT;

#ifdef DEBUG
    {
        size_t mx, mu;

        mx = (size_t) MEM_UNIT*c_info->max_mem;
        mu = (size_t) MEM_UNIT*c_info->used_mem;

        DBG_I3("Got send / recv lengths; iters ",s_len,r_len,ITERATIONS->n_sample);
        DBG_I2("max  / used memory ",mx,mu);
        DBG_I2("send / recv offsets ",ITERATIONS->s_offs, ITERATIONS->r_offs);
        DBG_I2("send / recv cache iterations ",ITERATIONS->s_cache_iter, ITERATIONS->r_cache_iter);
        DBG_I2("send / recv buffer allocations ",s_alloc, r_alloc);
        DBGF_I2("Got send / recv lengths ",s_len,r_len);
        DBGF_I2("max  / used memory ",mx,mu);
        DBGF_I2("send / recv offsets ",ITERATIONS->s_offs, ITERATIONS->r_offs);
        DBGF_I2("send / recv cache iterations ",ITERATIONS->s_cache_iter, ITERATIONS->r_cache_iter);
        DBGF_I2("send / recv buffer allocations ",s_alloc, r_alloc);
    }
#endif

    if( c_info->used_mem > c_info->max_mem ) {
        Bmark->sample_failure=SAMPLE_FAILED_MEMORY;
        return;
    }

    if (s_alloc > 0  && r_alloc > 0) {
        if (ITERATIONS->use_off_cache) {
            IMB_alloc_buf(c_info, "IMB_init_buffers_iter 1", s_alloc, r_alloc);
            IMB_set_buf(c_info, c_info->rank, 0, s_len-1, 0, r_len-1);

            for (irep = 1; irep < ITERATIONS->s_cache_iter; irep++) {
                i_s = irep % ITERATIONS->s_cache_iter;
                memcpy((void*)((char*)c_info->s_buffer + i_s * ITERATIONS->s_offs), c_info->s_buffer, s_len);
            }

            for (irep = 1; irep < ITERATIONS->r_cache_iter; irep++) {
                i_r = irep % ITERATIONS->r_cache_iter;
                memcpy((void*)((char*)c_info->r_buffer + i_r * ITERATIONS->r_offs), c_info->r_buffer, r_len);
            }
        } else {
            IMB_set_buf(c_info, c_info->rank, 0, s_alloc-1, 0, r_alloc-1);
        }
    }

    IMB_init_transfer(c_info, Bmark, size, (MPI_Aint) max(s_alloc, r_alloc));

    /* Determine #iterations if dynamic adaptation requested */
    if ((ITERATIONS->iter_policy == imode_dynamic) || (ITERATIONS->iter_policy == imode_auto && !root_based)) {
        double time[MAX_TIME_ID];
        int acc_rep_test, t_sample;
        int selected_n_sample = ITERATIONS->n_sample;

        memset(time, 0, MAX_TIME_ID);
        if (iter == 0 || BMODE->type == Sync) {
            ITERATIONS->n_sample_prev = ITERATIONS->msgspersample;
            if (c_info->n_lens > 0) {
                memset(ITERATIONS->numiters, 0, c_info->n_lens);
            }
        }

        /* first, run 1 iteration only */
        ITERATIONS->n_sample=1;
#ifdef MPI1
        c_info->select_source = Bmark->select_source;
#endif
        Bmark->Benchmark(c_info,size,ITERATIONS,BMODE,&time[0]);

        time[1] = time[0];

#ifdef MPIIO
        if( Bmark->access != no) {
            ierr = MPI_File_seek(c_info->fh, 0 ,MPI_SEEK_SET);
            MPI_ERRHAND(ierr);

            if( Bmark->fpointer == shared) {
                ierr = MPI_File_seek_shared(c_info->fh, 0 ,MPI_SEEK_SET);
                MPI_ERRHAND(ierr);
            }
        }
#endif /*MPIIO*/

        MPI_Allreduce(&time[1], &time[0], 1, MPI_DOUBLE, MPI_MAX, c_info->communicator);

        {   /* determine rough #repetitions for a run time of 1 sec */
            int rep_test = 1;
            if (time[0] < (1.0 / MSGSPERSAMPLE)) {
                rep_test = MSGSPERSAMPLE;
            } else if ((time[0] < 1.0)) {
                rep_test = (int)(1.0 / time[0] + 0.5);
            }

            MPI_Allreduce(&rep_test, &acc_rep_test, 1, MPI_INT, MPI_MAX, c_info->communicator);
        }

        ITERATIONS->n_sample = min(selected_n_sample, acc_rep_test);

        if (ITERATIONS->n_sample > 1) {
#ifdef MPI1
            c_info->select_source = Bmark->select_source;
#endif
            Bmark->Benchmark(c_info,size,ITERATIONS,BMODE,&time[0]);
            time[1] = time[0];
#ifdef MPIIO
            if( Bmark->access != no) {
                ierr = MPI_File_seek(c_info->fh, 0 ,MPI_SEEK_SET);
                MPI_ERRHAND(ierr);

                if ( Bmark->fpointer == shared) {
                    ierr = MPI_File_seek_shared(c_info->fh, 0 ,MPI_SEEK_SET);
                    MPI_ERRHAND(ierr);
                }
            }
#endif /*MPIIO*/

            MPI_Allreduce(&time[1], &time[0], 1, MPI_DOUBLE, MPI_MAX, c_info->communicator);
        }

        {
            float val = (float) (1+ITERATIONS->secs/time[0]);
            t_sample = (time[0] > 1.e-8 && (val <= (float) 0x7fffffff))
                       ? (int)val
                       : selected_n_sample;
        }

        if (c_info->n_lens>0 && BMODE->type != Sync) {
            // check monotonicity with msg sizes
            int i;
            for (i = 0; i < iter; i++) {
                t_sample = ( c_info->msglen[i] < size )
                           ? min(t_sample,ITERATIONS->numiters[i])
                           : max(t_sample,ITERATIONS->numiters[i]);
            }
            ITERATIONS->n_sample = ITERATIONS->numiters[iter] = min(selected_n_sample, t_sample);
        } else {
            ITERATIONS->n_sample = min(selected_n_sample,
                                       min(ITERATIONS->n_sample_prev, t_sample));
        }

        MPI_Bcast(&ITERATIONS->n_sample, 1, MPI_INT, 0, c_info->communicator);

#ifdef DEBUG
        {
            int usec=time*1000000;

            DBGF_I2("Checked time with #iters / usec ",acc_rep_test,usec);
            DBGF_I1("=> # samples, aligned with previous ",t_sample);
            DBGF_I1("final #samples ",ITERATIONS->n_sample);
        }
#endif
    } else { /*if( (ITERATIONS->iter_policy == imode_dynamic) || (ITERATIONS->iter_policy == imode_auto && !root_based) )*/
        double time[MAX_TIME_ID];
        Bmark->Benchmark(c_info,size,ITERATIONS,BMODE,&time[0]);
    }

    ITERATIONS->n_sample_prev=ITERATIONS->n_sample;

    /* >> IMB 3.1  */

}
Example #2
0
/*
Major reconstruction of memory management for -off_cache flag
*/
void IMB_init_buffers_iter(struct comm_info* c_info, struct iter_schedule* ITERATIONS, 
                           struct Bench* Bmark, MODES BMODE, int iter, int size)
/*

                      
                      Initializes communications buffers (call set_buf)
                      Initializes iterations scheduling


Input variables: 


-Bmark                (type struct Bench*)                      
                      (For explanation of struct Bench type:
                      describes all aspects of modes of a benchmark;
                      see [1] for more information)
                      
                      Current benchmark

-BMODE                (type MODES)
                      aggregate / non aggregate
                      
-iter                 (type int)
                      number of current iteration of message size loop

-size                 (type int)                      
                      Message size
                      

In/out variables: 

-c_info               (type struct comm_info*)                      
                      Collection of all base data for MPI;
                      see [1] for more information
                      
                      Communications buffers are allocated and assigned values

-ITERATIONS           (type struct iter_schedule*)
                      Adaptive number of iterations, out of cache scheduling are
                      setup if requested
                      


*/
/* >> IMB 3.1  */
{
/* IMB 3.1 << */
    size_t s_len, r_len, s_alloc, r_alloc;
    int init_size, irep, i_s, i_r, x_sample;

    x_sample = BMODE->AGGREGATE ? ITERATIONS->msgspersample : ITERATIONS->msgs_nonaggr;

    /* July 2002 fix V2.2.1: */
#if (defined EXT || defined MPIIO)
    if( Bmark->access==no ) x_sample=ITERATIONS->msgs_nonaggr;
#endif

    if ( size>0 )
	ITERATIONS->n_sample =max(1,min(ITERATIONS->overall_vol/size,x_sample));
    else ITERATIONS->n_sample = x_sample ;

    Bmark->sample_failure=0;

    init_size = max(size,asize);

    if(c_info->rank < 0) return;

    if(!strcmp(Bmark->name,"Alltoall") || !strcmp(Bmark->name,"Alltoallv"))
    {
	s_len = (size_t) c_info->num_procs*init_size;
	r_len = (size_t) c_info->num_procs*init_size;
    }
    else if( !strcmp(Bmark->name,"Allgather") || !strcmp(Bmark->name,"Allgatherv")
	     ||!strcmp(Bmark->name,"Gather") || !strcmp(Bmark->name,"Gatherv") )
    {
	s_len = (size_t) init_size;
	r_len = (size_t) c_info->num_procs*init_size;
    }
    else if( !strcmp(Bmark->name,"Exchange") )
    {
	s_len = (size_t) 2*init_size;
	r_len = (size_t) init_size;
    }
    else if( !strcmp(Bmark->name,"Scatter") || !strcmp(Bmark->name,"Scatterv") )
    {
	s_len = (size_t) c_info->num_procs*init_size;
	r_len = (size_t) init_size;
    }
    else if( !strcmp(Bmark->name,"Barrier") || /*!strcmp(Bmark->name,"Window") ||*/ !strcmp(Bmark->name,"Open_Close") )
    {
	s_len = r_len = 0;
    }
    else
	s_len = r_len = (size_t) init_size;


    /*===============================================*/
    /* the displ is declared as int by MPI1 standard
       If c_info->num_procs*init_size  exceed INT_MAX value there is no way to run this sample
     */
    if( !strcmp(Bmark->name,"Alltoallv")  || 
	!strcmp(Bmark->name,"Allgatherv") ||
	!strcmp(Bmark->name,"Scatterv")	  ||
	!strcmp(Bmark->name,"Gatherv"))
    {
	if( s_len > INT_MAX || r_len > INT_MAX)
	{
	    Bmark->sample_failure=SAMPLE_FAILED_INT_OVERFLOW;
	    return;
	}
    }
    /*===============================================*/

    /* IMB 3.1: new memory management for -off_cache */
    if( BMODE->type == Sync ) 
    {
	ITERATIONS->use_off_cache=0;
	ITERATIONS->n_sample=x_sample;
    }
    else
    {
#ifdef MPIIO
	ITERATIONS->use_off_cache=0;
#else  
	ITERATIONS->use_off_cache=ITERATIONS->off_cache;
#endif  

	/*ITERATIONS->use_off_cache=ITERATIONS->off_cache;*/

	if( ITERATIONS->off_cache ) 
	{

	    if( ITERATIONS->cache_size>0 )
	    {
		size_t	cls = (size_t) ITERATIONS->cache_line_size;
		size_t  ofs;

		ofs = ( ( s_len + cls -1 )/cls + 1 )*cls;
		ITERATIONS->s_offs = ofs;
		ITERATIONS->s_cache_iter = min(ITERATIONS->n_sample,(2*ITERATIONS->cache_size*CACHE_UNIT+ofs-1)/ofs);
		ofs = ( ( r_len + cls -1 )/cls + 1 )*cls;
		ITERATIONS->r_offs = ofs;
		ITERATIONS->r_cache_iter = min(ITERATIONS->n_sample,(2*ITERATIONS->cache_size*CACHE_UNIT+ofs-1)/ofs);
	    }
	    else
	    {
		ITERATIONS->s_offs=ITERATIONS->r_offs=0;
		ITERATIONS->s_cache_iter=ITERATIONS->r_cache_iter=1;
	    }

	}	

    }

#ifdef MPIIO
    s_alloc = s_len;
    r_alloc = r_len;
#else
    if( ITERATIONS->use_off_cache ) 
    {
	s_alloc = max(s_len,ITERATIONS->s_cache_iter*ITERATIONS->s_offs);
	r_alloc = max(r_len,ITERATIONS->r_cache_iter*ITERATIONS->r_offs);
    }
    else
    {
	s_alloc = s_len;
	r_alloc = r_len;
    }
#endif

    c_info->used_mem = 1.f*(s_alloc+r_alloc)/MEM_UNIT;

#ifdef DEBUG 
    {
	size_t mx, mu;

	mx = (size_t) MEM_UNIT*c_info->max_mem;
	mu = (size_t) MEM_UNIT*c_info->used_mem;

	DBG_I3("Got send / recv lengths; iters ",s_len,r_len,ITERATIONS->n_sample);
	DBG_I2("max  / used memory ",mx,mu);
	DBG_I2("send / recv offsets ",ITERATIONS->s_offs, ITERATIONS->r_offs);
	DBG_I2("send / recv cache iterations ",ITERATIONS->s_cache_iter, ITERATIONS->r_cache_iter); 
	DBG_I2("send / recv buffer allocations ",s_alloc, r_alloc);
	DBGF_I2("Got send / recv lengths ",s_len,r_len);
	DBGF_I2("max  / used memory ",mx,mu);
	DBGF_I2("send / recv offsets ",ITERATIONS->s_offs, ITERATIONS->r_offs);
	DBGF_I2("send / recv cache iterations ",ITERATIONS->s_cache_iter, ITERATIONS->r_cache_iter); 
	DBGF_I2("send / recv buffer allocations ",s_alloc, r_alloc);
    }
#endif

    if( s_alloc + r_alloc > c_info->max_mem*MEM_UNIT )
    {
	Bmark->sample_failure=SAMPLE_FAILED_MEMORY;
    }
    else 
    {

	if( ITERATIONS->use_off_cache )
	{

	    if( s_alloc > 0  && r_alloc > 0)
	    {
		IMB_alloc_buf(c_info, "IMB_init_buffers_iter 1", s_alloc, r_alloc);
		IMB_set_buf(c_info, c_info->rank, 0, s_len-1, 0, r_len-1);

		for( irep=1; irep<ITERATIONS->s_cache_iter; irep++)
		{
		    i_s=irep%ITERATIONS->s_cache_iter;
		    memcpy((void*)((char*)c_info->s_buffer+i_s*ITERATIONS->s_offs),c_info->s_buffer, s_len);
		}

		for( irep=1; irep<ITERATIONS->r_cache_iter; irep++)
		{
		    i_r=irep%ITERATIONS->r_cache_iter;
		    memcpy((void*)((char*)c_info->r_buffer+i_r*ITERATIONS->r_offs),c_info->r_buffer, r_len);
		}

	    }
	}
	else
	{
	    if( s_alloc > 0  && r_alloc > 0)
	    {
		IMB_set_buf(c_info, c_info->rank, 0, s_alloc-1, 0, r_alloc-1);
	    }
	    
	}

	IMB_init_transfer(c_info, Bmark, size, (MPI_Aint) max(s_alloc, r_alloc));

	/* Determine #iterations if dynamic adaptation requested */
	if( ITERATIONS->iter_dyn )
	{
	    double time[2];
	    int selected_n_sample;

	    int rep_test, acc_rep_test, t_sample;

	    selected_n_sample=ITERATIONS->n_sample;

	    if( iter==0 || BMODE->type == Sync) 
	    {
		ITERATIONS->n_sample_prev=ITERATIONS->msgspersample;

		if( c_info->n_lens> 0)
		{
		    int i;
		    for(i=0; i<c_info->n_lens; i++) ITERATIONS->numiters[i]=0;
		}
	    }

	    rep_test=1;

	    ITERATIONS->n_sample=rep_test;

	    time[0]=time[1]=0;

	    /* first, run 1 iteration only */
#ifdef MPI1
	    c_info->select_source = Bmark->select_source;
#endif
	    Bmark->Benchmark(c_info,size,ITERATIONS,BMODE,&time[0]);

	    time[1] = time[0];

#ifdef MPIIO
	    if( Bmark->access != no)
	    {
        	    ierr = MPI_File_seek(c_info->fh, 0 ,MPI_SEEK_SET);
        	    MPI_ERRHAND(ierr);

		    if( Bmark->fpointer == shared)
		    {
    			ierr = MPI_File_seek_shared(c_info->fh, 0 ,MPI_SEEK_SET);
			MPI_ERRHAND(ierr);
		    }
	    }
#endif /*MPIIO*/

	    MPI_Allreduce(&time[1], &time[0], 1, MPI_DOUBLE, MPI_MAX, c_info->communicator);

	    /* determine rough #repetitions for a run time of 1 sec */

	    if( time[0] < 0.001 )
	    {
		rep_test=1000;
	    }
	    else if( time[0]<1. )
	    {
		rep_test = (int) (1./time[0]+.5);
	    }

	    MPI_Allreduce(&rep_test, &acc_rep_test, 1, MPI_INT, MPI_MAX, c_info->communicator);

	    ITERATIONS->n_sample=min(selected_n_sample,acc_rep_test);

	    if( ITERATIONS->n_sample>1 ) 
	    {
#ifdef MPI1
		c_info->select_source = Bmark->select_source;
#endif
		Bmark->Benchmark(c_info,size,ITERATIONS,BMODE,&time[0]);
		time[1] = time[0];
		
#ifdef MPIIO
	    if( Bmark->access != no)
	    {
        	    ierr = MPI_File_seek(c_info->fh, 0 ,MPI_SEEK_SET);
        	    MPI_ERRHAND(ierr);

		    if( Bmark->fpointer == shared)
		    {
    			ierr = MPI_File_seek_shared(c_info->fh, 0 ,MPI_SEEK_SET);
			MPI_ERRHAND(ierr);
		    }
	    }
#endif /*MPIIO*/
		
		MPI_Allreduce(&time[1], &time[0], 1, MPI_DOUBLE, MPI_MAX, c_info->communicator);
	    }

	    if( time[0] > 1.e-8 )
	    {
		float val = (float) (1+ITERATIONS->secs/time[0]);
		t_sample = (val<= (float) 0x7fffffff) ? (int) val :  selected_n_sample;
	    }
	    else
	    {
		t_sample = selected_n_sample;
	    }

	    if( c_info->n_lens>0 && BMODE->type != Sync)
	    {
		// check monotonicity with msg sizes 
		int it;
		for(it=0; it<iter; it++)
		{
		    if( c_info->msglen[it] < size ) t_sample = min(t_sample,ITERATIONS->numiters[it]);
		    else                            t_sample = max(t_sample,ITERATIONS->numiters[it]);
		}

		ITERATIONS->n_sample = ITERATIONS->numiters[iter] = min( selected_n_sample,t_sample );
	    }
	    else
	    {
		ITERATIONS->n_sample = min( selected_n_sample, min( ITERATIONS->n_sample_prev, t_sample ) );
	    }

	    MPI_Bcast(&ITERATIONS->n_sample, 1, MPI_INT, 0, c_info->communicator);

#ifdef DEBUG
	    {
		int usec=time*1000000;

		DBGF_I2("Checked time with #iters / usec ",acc_rep_test,usec);
		DBGF_I1("=> # samples, aligned with previous ",t_sample);
		DBGF_I1("final #samples ",ITERATIONS->n_sample);
	    }
#endif
	} /*if( ITERATIONS->iter_dyn )*/

	ITERATIONS->n_sample_prev=ITERATIONS->n_sample;

  } /*if (!( s_alloc + r_alloc > c_info->max_mem*MEM_UNIT ))*/


/* >> IMB 3.1  */

}