Пример #1
0
/**
 * give control back to the scheduler after main() exits.  This allows
 * remaining threads to continue running.
 * FIXME: we don't know whether user explicit calls exit() or main() normally returns
 * in the previous case, we should exit immediately, while in the later, we should 
 * join other threads.
 * Overriding exit() does not work because normal returning from
 * main() also calls exit().
 **/
static void exit_func(void)
{
  // don't do anything if we're in a forked child process
  if( getpid() != capriccio_main_pid )
    return;

  exit_func_done = 1;
  main_exited = 1;
  if( !exit_whole_program )
  	// this will block until all other threads finish
    thread_exit(NULL);

  // dump the blocking graph before we exit
  if( conf_dump_blocking_graph ) {
    tdebug("dumping blocking graph from exit_func()\n");
    dump_blocking_graph(); 
  }

  // FIXME: make sure to kill cloned children

  if( conf_dump_timing_info ) {
    if( main_timer.running )   stop_timer(&main_timer);
    if( scheduler_timer.running )   stop_timer(&scheduler_timer);
    if( app_timer.running )   stop_timer(&app_timer);
    print_timers();
  }
}
Пример #2
0
void* thread_disconnector()
{
	if (DEBUG == 1)
		printf("DEBUG thread_disconnector: begin\n");
	while (1)
	{
		int i=0;
		for (;i<connected_count;++i)
		{
			if (DEBUG==1)
				printf("DEBUG thread_disconnector: time: %f\n",get_timer_value(i));
			if (get_timer_value(i) > 10000.0f)
			{
				if (DEBUG == 1)
				{	
					printf("DEBUG thread_disconnector: client %d doesnt response\n",i);
					print_timers();
				}

				sem_wait(&semaphore);
				// przesunac cl_names
				memcpy(cl_names+i, cl_names+i+1,sizeof(cl_names[i])*(connected_count-i-1));
				// przesunac cl_addr
				memcpy(cl_addr+i, cl_addr+i+1,sizeof(cl_addr[i])*(connected_count-i-1));
				// przesunac timery
				memcpy(timers_start+i, timers_start+i+1,sizeof(timers_start[i])*(connected_count-i-1));
				--connected_count;
				sem_post(&semaphore);
				if (DEBUG == 1)
				{
					printf("DEBUG thread_disconnector: client %d shifting finished\n",i);
					print_timers();
				}
			}
		}
		sleep(1);
	}
}
Пример #3
0
char is_connected (char* name)
{
	int i;
	if (DEBUG == 1)
		printf("DEBUG  is_connected: begin\n");
	print_timers();
	for (i=0;i<connected_count; ++i)
	{
		if (DEBUG == 1)
			printf("DEBUG  is_connected: 1%s %s1\n",name, cl_names[i]);
		if (strcmp(name, cl_names[i]) == 0)
		{
			reset_timer(i);
			return 1;
		}
	}
	if (DEBUG == 1)
		printf("DEBUG  is_connected: end\n");
	return 0;
}
Пример #4
0
int main(int argc, char *argv[])
{
  int i;
  int iter;
  double total_time, mflops;
  logical verified;
  char Class;

  if (argc == 1) {
    fprintf(stderr, "Usage: %s <kernel directory>\n", argv[0]);
    exit(-1);
  }

  //---------------------------------------------------------------------
  // Run the entire problem once to make sure all data is touched. 
  // This reduces variable startup costs, which is important for such a 
  // short benchmark. The other NPB 2 implementations are similar. 
  //---------------------------------------------------------------------
  for (i = 1; i <= T_max; i++) {
    timer_clear(i);
  }
  setup();
  setup_opencl(argc, argv);
  init_ui(&m_u0, &m_u1, &m_twiddle, dims[0], dims[1], dims[2]);
  compute_indexmap(&m_twiddle, dims[0], dims[1], dims[2]);
  compute_initial_conditions(&m_u1, dims[0], dims[1], dims[2]);
  fft_init(dims[0]);
  fft(1, &m_u1, &m_u0);

  //---------------------------------------------------------------------
  // Start over from the beginning. Note that all operations must
  // be timed, in contrast to other benchmarks. 
  //---------------------------------------------------------------------
  for (i = 1; i <= T_max; i++) {
    timer_clear(i);
  }

  timer_start(T_total);
  if (timers_enabled) timer_start(T_setup);

  DTIMER_START(T_compute_im);
  compute_indexmap(&m_twiddle, dims[0], dims[1], dims[2]);
  DTIMER_STOP(T_compute_im);

  DTIMER_START(T_compute_ics);
  compute_initial_conditions(&m_u1, dims[0], dims[1], dims[2]);
  DTIMER_STOP(T_compute_ics);

  DTIMER_START(T_fft_init);
  fft_init(dims[0]);
  DTIMER_STOP(T_fft_init);

  if (timers_enabled) timer_stop(T_setup);
  if (timers_enabled) timer_start(T_fft);
  fft(1, &m_u1, &m_u0);
  if (timers_enabled) timer_stop(T_fft);

  for (iter = 1; iter <= niter; iter++) {
    if (timers_enabled) timer_start(T_evolve);
    evolve(&m_u0, &m_u1, &m_twiddle, dims[0], dims[1], dims[2]);
    if (timers_enabled) timer_stop(T_evolve);
    if (timers_enabled) timer_start(T_fft);
    fft(-1, &m_u1, &m_u1);
    if (timers_enabled) timer_stop(T_fft);
    if (timers_enabled) timer_start(T_checksum);
    checksum(iter, &m_u1, dims[0], dims[1], dims[2]);
    if (timers_enabled) timer_stop(T_checksum);
  }

  verify(NX, NY, NZ, niter, &verified, &Class);

  timer_stop(T_total);
  total_time = timer_read(T_total);

  if (total_time != 0.0) {
    mflops = 1.0e-6 * (double)NTOTAL *
            (14.8157 + 7.19641 * log((double)NTOTAL)
            + (5.23518 + 7.21113 * log((double)NTOTAL)) * niter)
            / total_time;
  } else {
    mflops = 0.0;
  }
  c_print_results("FT", Class, NX, NY, NZ, niter,
                  total_time, mflops, "          floating point", verified, 
                  NPBVERSION, COMPILETIME, CS1, CS2, CS3, CS4, CS5, CS6, CS7,
                  clu_GetDeviceTypeName(device_type),
                  device_name);
  if (timers_enabled) print_timers();

  release_opencl();

  fflush(stdout);

  return 0;
}
Пример #5
0
static int realmain(void *carg)
{
    unsigned arg = (uintptr_t)carg;

/*c-------------------------------------------------------------------
c-------------------------------------------------------------------*/

    int i, ierr;
      
/*------------------------------------------------------------------
c u0, u1, u2 are the main arrays in the problem. 
c Depending on the decomposition, these arrays will have different 
c dimensions. To accomodate all possibilities, we allocate them as 
c one-dimensional arrays and pass them to subroutines for different 
c views
c  - u0 contains the initial (transformed) initial condition
c  - u1 and u2 are working arrays
c  - indexmap maps i,j,k of u0 to the correct i^2+j^2+k^2 for the
c    time evolution operator. 
c-----------------------------------------------------------------*/

/*--------------------------------------------------------------------
c Large arrays are in common so that they are allocated on the
c heap rather than the stack. This common block is not
c referenced directly anywhere else. Padding is to avoid accidental 
c cache problems, since all array sizes are powers of two.
c-------------------------------------------------------------------*/
    static dcomplex u0[NZ][NY][NX];
    static dcomplex pad1[3];
    static dcomplex u1[NZ][NY][NX];
    static dcomplex pad2[3];
    static dcomplex u2[NZ][NY][NX];
    static dcomplex pad3[3];
    static int indexmap[NZ][NY][NX];
    
    int iter;
    int nthreads = 1;
    double total_time, mflops;
    boolean verified;
    char class;

    omp_set_num_threads(arg);
/*--------------------------------------------------------------------
c Run the entire problem once to make sure all data is touched. 
c This reduces variable startup costs, which is important for such a 
c short benchmark. The other NPB 2 implementations are similar. 
c-------------------------------------------------------------------*/
    for (i = 0; i < T_MAX; i++) {
	timer_clear(i);
    }
    setup();
#pragma omp parallel
 {
    compute_indexmap(indexmap, dims[2]);
#pragma omp single
   {
    compute_initial_conditions(u1, dims[0]);
    fft_init (dims[0][0]);
   }
    fft(1, u1, u0);
 } /* end parallel */

/*--------------------------------------------------------------------
c Start over from the beginning. Note that all operations must
c be timed, in contrast to other benchmarks. 
c-------------------------------------------------------------------*/
    for (i = 0; i < T_MAX; i++) {
	timer_clear(i);
    }

    timer_start(T_TOTAL);
    if (TIMERS_ENABLED == TRUE) timer_start(T_SETUP);

#pragma omp parallel private(iter) firstprivate(niter)
  {
    compute_indexmap(indexmap, dims[2]);

#pragma omp single
   {
    compute_initial_conditions(u1, dims[0]);
    
    fft_init (dims[0][0]);
   }

    if (TIMERS_ENABLED == TRUE) {
#pragma omp master
      timer_stop(T_SETUP);
    }
    if (TIMERS_ENABLED == TRUE) {
#pragma omp master   
      timer_start(T_FFT);
    }
    fft(1, u1, u0);
    if (TIMERS_ENABLED == TRUE) {
#pragma omp master      
      timer_stop(T_FFT);
    }

    for (iter = 1; iter <= niter; iter++) {
	if (TIMERS_ENABLED == TRUE) {
#pragma omp master      
	  timer_start(T_EVOLVE);
	}
	
	evolve(u0, u1, iter, indexmap, dims[0]);
	
        if (TIMERS_ENABLED == TRUE) {
#pragma omp master      
	  timer_stop(T_EVOLVE);
	}
        if (TIMERS_ENABLED == TRUE) {
#pragma omp master      
	  timer_start(T_FFT);
	}
	
        fft(-1, u1, u2);
	
        if (TIMERS_ENABLED == TRUE) {
#pragma omp master      
	  timer_stop(T_FFT);
	}
        if (TIMERS_ENABLED == TRUE) {
#pragma omp master      
	  timer_start(T_CHECKSUM);
	}
	
        checksum(iter, u2, dims[0]);
	
        if (TIMERS_ENABLED == TRUE) {
#pragma omp master      
	  timer_stop(T_CHECKSUM);
	}
    }
    
#pragma omp single
    verify(NX, NY, NZ, niter, &verified, &class);
    
#if defined(_OPENMP)
#pragma omp master    
    nthreads = omp_get_num_threads();
#endif /* _OPENMP */    
  } /* end parallel */
  
    timer_stop(T_TOTAL);
    total_time = timer_read(T_TOTAL);

    if( total_time != 0.0) {
	mflops = 1.0e-6*(double)(NTOTAL) *
	    (14.8157+7.19641*log((double)(NTOTAL))
	     +  (5.23518+7.21113*log((double)(NTOTAL)))*niter)
	    /total_time;
    } else {
	mflops = 0.0;
    }
#ifdef BOMP
backend_create_time(arg);
#endif
printf("Computetime %d %f\n", arg, total_time);
printf("client done\n");
/*     c_print_results("FT", class, NX, NY, NZ, niter, nthreads, */
/* 		    total_time, mflops, "          floating point", verified,  */
/* 		    NPBVERSION, COMPILETIME, */
/* 		    CS1, CS2, CS3, CS4, CS5, CS6, CS7); */
    if (TIMERS_ENABLED == TRUE) print_timers();
}