Beispiel #1
1
/* start simulation, program loaded, processor precise state initialized */
void
sim_main(void)
{
  md_inst_t inst;
  register md_addr_t addr;
  enum md_opcode op;
  register int is_write;
  enum md_fault_type fault;

  // conditional-branch offset bits
  int offbits = 0;
  int twoscompl = 0;
  unsigned int abs_offbits = 0;
  
  // number of bits changed in a GPR
  int bits_changed;
  
  int dst_reg; // destination register in part 3

  fprintf(stderr, "sim: ** starting functional simulation **\n");

  /* set up initial default next PC */
  regs.regs_NPC = regs.regs_PC + sizeof(md_inst_t);

	// initialize all the arrays
	init_arrays();
	
		
  while (TRUE)
    {
      /* maintain $r0 semantics */
      regs.regs_R[MD_REG_ZERO] = 0;
#ifdef TARGET_ALPHA
      regs.regs_F.d[MD_REG_ZERO] = 0.0;
#endif /* TARGET_ALPHA */

      /* get the next instruction to execute */
      MD_FETCH_INST(inst, mem, regs.regs_PC);

      /* keep an instruction count */
      sim_num_insn++;

      /* set default reference address and access mode */
      addr = 0; is_write = FALSE;

      /* set default fault - none */
      fault = md_fault_none;

      /* decode the instruction */
      MD_SET_OPCODE(op, inst);

      /* execute the instruction */
      switch (op)
	{
#define DEFINST(OP,MSK,NAME,OPFORM,RES,FLAGS,O1,O2,I1,I2,I3)		\
	case OP:							\
		  dst_reg = O1;					\
		  if(dst_reg > 0 && dst_reg < NUM_REGS) prev_val[dst_reg] = regs.regs_R[dst_reg]; \
          SYMCAT(OP,_IMPL);						\
          break;
#define DEFLINK(OP,MSK,NAME,MASK,SHIFT)					\
        case OP:							\
          panic("attempted to execute a linking opcode");
#define CONNECT(OP)
#define DECLARE_FAULT(FAULT)						\
	  { fault = (FAULT); break; }
#include "machine.def"
	default:
	  panic("attempted to execute a bogus opcode");
      }

      if (fault != md_fault_none)
	fatal("fault (%d) detected @ 0x%08p", fault, regs.regs_PC);

      if (verbose)
	{
	  myfprintf(stderr, "%10n [xor: 0x%08x] @ 0x%08p: ",
		    sim_num_insn, md_xor_regs(&regs), regs.regs_PC);
	  md_print_insn(inst, regs.regs_PC, stderr);
	  if (MD_OP_FLAGS(op) & F_MEM)
	    myfprintf(stderr, "  mem: 0x%08p", addr);
	  fprintf(stderr, "\n");
	  /* fflush(stderr); */
	}
	
	
	// for registers 0-31 determine the new value written to dst_reg
	if(dst_reg > 0 && dst_reg < NUM_REGS){
		
		bits_changed = func_bits_chng(prev_val[dst_reg], regs.regs_R[dst_reg]);
		reg_contents[dst_reg] += bits_changed;
		num_inst_chng_bits++;
		
	}
	
	
	  if(MD_OP_FLAGS(op) & F_COND)
	{
		g_total_cond_branches++;
		
		// increment histogram index with corresp. offset bits
		offbits = (regs.regs_TPC - regs.regs_PC)/8;
		
		// absolute value representation of the offbits
		abs_offbits = abs(regs.regs_TPC - regs.regs_PC)/8;
				
		if(offbits < 0){
			// if offset is negative, simply add one bit (the signed bit) to its absolute value
			// this is equivalent to offbits = ceil(log10(-1*offbits)/log10(2) + 1); (trust me, I've tried it)
			offbits = floor(log10(abs_offbits)/log10(2) + 2) + 1; 
		}else{
			offbits = floor(log10(abs_offbits)/log10(2) + 2);
		}
		
		
		histogram[offbits]++;

	}
	
	  if(MD_OP_FLAGS(op) & F_DIRJMP)
	{
		g_total_uncond_branches++;
		
	}
	
	  if((MD_OP_FLAGS(op) & F_FCOMP) || (MD_OP_FLAGS(op) & F_FPCOND))
	{
		g_total_fp_inst++;
	}
	
	  if(MD_OP_FLAGS(op) & F_STORE)
	{
		g_total_store_inst++;
	}
	
	  if(MD_OP_FLAGS(op) & F_LOAD)
	{
		g_total_ld_inst++;
	}
	
	  if(MD_OP_FLAGS(op) & F_IMM)
	{
		g_total_imm_inst++;
	}
	
      if (MD_OP_FLAGS(op) & F_MEM)
	{
	  sim_num_refs++;
	  if (MD_OP_FLAGS(op) & F_STORE)
	    is_write = TRUE;
	}

	

      /* go to the next instruction */
      regs.regs_PC = regs.regs_NPC;
      regs.regs_NPC += sizeof(md_inst_t);

      /* finish early? */
      if (max_insts && sim_num_insn >= max_insts){
			
			return;
		}
    }
}
int main (void)
{ 
  init_arrays ();
  main1 (2000, 2000, 1);
  main1 (0, 1599, 0);
  return 0;
}
Beispiel #3
0
int main()
{
  init_arrays();

  double annot_t_start=0, annot_t_end=0, annot_t_total=0;
  int annot_i;

  int v1,v2,o1,o2,ox;
  int tv1,tv2,to1,to2,tox;

  for (annot_i=0; annot_i<REPS; annot_i++)
  {
    annot_t_start = rtclock();

    for (v1=0; v1<=V-1; v1=v1+1) 
      for (v2=0; v2<=V-1; v2=v2+1) 
	for (o1=0; o1<=O-1; o1=o1+1) 
	  for (o2=0; o2<=O-1; o2=o2+1) 
	    for (ox=0; ox<=O-1; ox=ox+1) 
	      R[v1][v2][o1][o2]=R[v1][v2][o1][o2]+T[v1][ox][o1][o2]*A2[v2][ox];
    
    annot_t_end = rtclock();
    annot_t_total += annot_t_end - annot_t_start;
  }
  
  annot_t_total = annot_t_total / REPS;
  printf("%f\n", annot_t_total);
  
  return 1;
}
Beispiel #4
0
int main (void)
{ 
  init_arrays();
  main1 (100, 100, 1);
  main1 (0, 15, 0);
  return 0;
}
Beispiel #5
0
int main(void) {
  DATA_TYPE *a;
  DATA_TYPE *x1;
  DATA_TYPE *x2;
  DATA_TYPE *x1_outputFromGpu;
  DATA_TYPE *x2_outputFromGpu;
  DATA_TYPE *y_1;
  DATA_TYPE *y_2;

  /////////////////////////
  size_t oldSizes[1] = { N };
  size_t newSizes[1];
  getNewSizes(oldSizes, NULL, newSizes, NULL, "mvt_kernel1", 1);
  N = newSizes[0];
  /////////////////////////

  a = (DATA_TYPE *)malloc(N * N * sizeof(DATA_TYPE));
  x1 = (DATA_TYPE *)malloc(N * sizeof(DATA_TYPE));
  x2 = (DATA_TYPE *)malloc(N * sizeof(DATA_TYPE));
  x1_outputFromGpu = (DATA_TYPE *)malloc(N * sizeof(DATA_TYPE));
  x2_outputFromGpu = (DATA_TYPE *)malloc(N * sizeof(DATA_TYPE));
  y_1 = (DATA_TYPE *)malloc(N * sizeof(DATA_TYPE));
  y_2 = (DATA_TYPE *)malloc(N * sizeof(DATA_TYPE));

  init_arrays(a, x1, x2, y_1, y_2);

  platform = new Platform(PLATFORM_ID);
  context = platform->getContext();
  Device device = platform->getDevice(DEVICE_ID);
  Queue queue(*context,device,Queue::EnableProfiling); 
  
  cl_mem_init(a, x1, x2, y_1, y_2,queue);
  
  Program program(context,KERNEL_DIRECTORY KERNEL_FILE_NAME);
  if(!program.build(device)){
           std::cout << "Error building the program: \n";
           std::cout <<program.getBuildLog(device); 
  }
  kernel1=program.createKernel(kernel1Name.c_str());
  kernel2=program.createKernel(kernel2Name.c_str());
  cl_launch_kernel(queue);


  queue.readBuffer(*x1_mem_obj,N * sizeof(DATA_TYPE), x1_outputFromGpu);
  queue.readBuffer(*x2_mem_obj,N * sizeof(DATA_TYPE), x2_outputFromGpu);
  queue.finish();

  runMvt(a, x1, x2, y_1, y_2, x1_outputFromGpu,x2_outputFromGpu);
  cl_clean_up();

  free(a);
  free(x1);
  free(x2);
  free(x1_outputFromGpu);
  free(x2_outputFromGpu);
  free(y_1);
  free(y_2);

  return 0;
}
Beispiel #6
0
int main() {
  double t_start, t_end;

  DATA_TYPE* A;
  DATA_TYPE* C;
  DATA_TYPE* D;

  A = (DATA_TYPE*)malloc(N*M*sizeof(DATA_TYPE));
  C = (DATA_TYPE*)malloc(N*M*sizeof(DATA_TYPE));
  D = (DATA_TYPE*)malloc(N*M*sizeof(DATA_TYPE));

  fprintf(stdout, "<< Symmetric rank-k operations >>\n");

  init_arrays(A, C, D);	
  syrkGPU(A, D);

  t_start = rtclock();
  syrk(A, C);
  t_end = rtclock();
  fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start);

  compareResults(C, D);

  free(A);
  free(C);
  free(D);
  return 0;
}
int main (void)
{ 
  init_arrays ();
  main1 (N, 2558400);
  main1 (N-1, 2555202);
  return 0;
}
Beispiel #8
0
int main(int argc, char** argv)
{
	double t_start, t_end;
	
	/* Array declaration */
	DATA_TYPE A[N][M];
	DATA_TYPE C[N][N];
	DATA_TYPE C_outputFromGpu[N][N];

	/* Initialize array. */
	init_arrays(A, C, C_outputFromGpu);

	#pragma hmpp syrk allocate
	#pragma hmpp syrk advancedload, args[a,c]

	t_start = rtclock();
	#pragma hmpp syrk callsite, args[a,c].advancedload=true, asynchronous
	runSyrk(A, C_outputFromGpu);
	#pragma hmpp syrk synchronize

	t_end = rtclock();
	fprintf(stderr, "GPU Runtime: %0.6lfs\n", t_end - t_start);
	
	#pragma hmpp syrk delegatedstore, args[c]
	#pragma hmpp syrk release

	t_start = rtclock();
	runSyrk(A, C);
	t_end = rtclock();
	fprintf(stderr, "CPU Runtime: %0.6lfs\n", t_end - t_start);

	compareResults(C, C_outputFromGpu);

	return 0;
}
Beispiel #9
0
bool hash_func( unsigned first_index, unsigned last_index )
/************************************************************/
{
    keyword_t i,w;

    output( "\ntrying hash function ( len+id[%u]*%u+id[(len-1)-%u]*%u ) mod %u\n",
                                first_index, first_scale,
                                last_index, last_scale,
                                hashsize );
    init_arrays( first_index, last_index );
    sort_frequency();
    if( quick_failure() ) {
        return( false );
    }
    try_for_hash();
    if( hash_ok() ) {
        return( true );
    }
    w = 1;
    for( i = 1; i <= num_keywords; ++i ) {
        if( collisions[i] >= collisions[w] ) {
            w = i;
        }
    }
    output( "keyword '%s' had the most collisions\n", tokens[w] );
    return( false );
}
Beispiel #10
0
int main(int argc, char** argv)
{
	int m = M;
	int n = N;
	double t_start, t_end;

	/* Array declaration */
	DATA_TYPE float_n = 321414134.01;
	DATA_TYPE data[M + 1][N + 1];
	DATA_TYPE data_Gpu[M + 1][N + 1];
	DATA_TYPE symmat[M + 1][M + 1];
	DATA_TYPE symmat_outputFromGpu[M + 1][M + 1];	
	DATA_TYPE mean[M + 1];
	DATA_TYPE mean_Gpu[M + 1];

	/* Initialize array. */
	init_arrays(data, data_Gpu);
    
	#pragma hmpp <group1> allocate
	#pragma hmpp <group1> loopa advancedload, args[pmean;pdata;pfloat_n]
    
	#pragma hmpp <group1> loopc advancedload, args[psymmat]

	t_start = rtclock();
	
	#pragma hmpp <group1> loopa callsite, args[pmean;pdata;pfloat_n].advancedload=true, asynchronous
	covarLoopa(mean_Gpu, data_Gpu, float_n);
	#pragma hmpp <group1> loopa synchronize
	#pragma hmpp <group1> loopb callsite, args[pdata;pmean].advancedload=true, asynchronous
	covarLoopb(data_Gpu, mean_Gpu);
	#pragma hmpp <group1> loopb synchronize
	#pragma hmpp <group1> loopc callsite, args[psymmat;pdata].advancedload=true, asynchronous
	covarLoopc(symmat_outputFromGpu, data_Gpu);
	#pragma hmpp <group1> loopc synchronize

	t_end = rtclock();
	fprintf(stderr, "GPU Runtime: %0.6lfs\n", t_end - t_start);
    
    
	#pragma hmpp <group1> loopb delegatedstore, args[pmean]
    
	#pragma hmpp <group1> loopc delegatedstore, args[psymmat;pdata]
	#pragma hmpp <group1> release
	
	t_start = rtclock();
	
	covarLoopa(mean, data, float_n);
	covarLoopb(data, mean);
	covarLoopc(symmat, data);
	
	t_end = rtclock();
	fprintf(stderr, "CPU Runtime: %0.6lfs\n", t_end - t_start);
	
	compareResults(symmat, symmat_outputFromGpu);

	return 0;
}
Beispiel #11
0
int main(int argc, char *argv[])
{
	/* Retrieve problem size. */
	int ni = NI;
	int nj = NJ;

	/* Variable declaration/allocation. */
	DATA_TYPE alpha;
	DATA_TYPE beta;
	POLYBENCH_2D_ARRAY_DECL(A,DATA_TYPE,NI,NJ,ni,nj);
	POLYBENCH_2D_ARRAY_DECL(B,DATA_TYPE,NI,NJ,ni,nj);
	POLYBENCH_2D_ARRAY_DECL(C,DATA_TYPE,NI,NI,ni,ni);
	POLYBENCH_2D_ARRAY_DECL(C_outputFromGpu,DATA_TYPE,NI,NI,ni,ni);

	init_arrays(ni, nj, &alpha, &beta, POLYBENCH_ARRAY(A), POLYBENCH_ARRAY(B), POLYBENCH_ARRAY(C));
	read_cl_file();
	cl_initialization();
	cl_mem_init(POLYBENCH_ARRAY(A), POLYBENCH_ARRAY(B), POLYBENCH_ARRAY(C));
	cl_load_prog();

	cl_launch_kernel(ni, nj, alpha, beta);

	errcode = clEnqueueReadBuffer(clCommandQue, c_mem_obj, CL_TRUE, 0, NI*NJ*sizeof(DATA_TYPE), POLYBENCH_ARRAY(C_outputFromGpu), 0, NULL, NULL);
	if(errcode != CL_SUCCESS) printf("Error in reading GPU mem\n");


	#ifdef RUN_ON_CPU

		/* Start timer. */
	  	polybench_start_instruments;

		syr2kCpu(ni, nj, alpha, beta, POLYBENCH_ARRAY(A), POLYBENCH_ARRAY(B), POLYBENCH_ARRAY(C));
	
		/* Stop and print timer. */
		printf("CPU Time in seconds:\n");
	  	polybench_stop_instruments;
	 	polybench_print_instruments;

		compareResults(ni, POLYBENCH_ARRAY(C), POLYBENCH_ARRAY(C_outputFromGpu));
	
	#else //prevent dead code elimination

		polybench_prevent_dce(print_array(ni, POLYBENCH_ARRAY(C_outputFromGpu)));

	#endif //RUN_ON_CPU


	cl_clean_up();

	POLYBENCH_FREE_ARRAY(A);
	POLYBENCH_FREE_ARRAY(B);
	POLYBENCH_FREE_ARRAY(C);
	POLYBENCH_FREE_ARRAY(C_outputFromGpu);

	return 0;
}
Beispiel #12
0
void user_input() {
    scanf("%d\n", &len);
    init_arrays(len + 1);

    X = Y = NULL;
    size_t z;

    getline(&X, &z, stdin);
    getline(&Y, &z, stdin);
}
Beispiel #13
0
int main(int argc, char* argv[]) 
//int main(void) 
{
	double t_start, t_end;
	
	DATA_TYPE* data;
	DATA_TYPE* mean;
	DATA_TYPE* stddev;
	DATA_TYPE* symmat;
	DATA_TYPE* symmat_outputFromGpu;
        if(argc==2){
          printf("arg 1 = %s\narg 2 = %s\n", argv[0], argv[1]);
          cpu_offset = atoi(argv[1]);
        }


	data = (DATA_TYPE*)malloc((M + 1)*(N + 1)*sizeof(DATA_TYPE));
	mean = (DATA_TYPE*)malloc((M + 1)*sizeof(DATA_TYPE));
	stddev = (DATA_TYPE*)malloc((M + 1)*sizeof(DATA_TYPE));
	symmat = (DATA_TYPE*)malloc((M + 1)*(N + 1)*sizeof(DATA_TYPE));
	symmat_outputFromGpu = (DATA_TYPE*)malloc((M + 1)*(N + 1)*sizeof(DATA_TYPE));
	
	init_arrays(data);
	read_cl_file();
	cl_initialization_fusion();
	//cl_initialization();
	cl_mem_init(data, mean, stddev, symmat);
	cl_load_prog();

	double start = rtclock();
	cl_launch_kernel();
	double end = rtclock(); 
	fprintf(stdout, "CAUTION:CPU offset %d %% GPU Runtime: %0.6lf s\n",cpu_offset, (end - start));
	//fprintf(stdout, "CAUTION:CPU offset %d %% GPU Runtime: %0.6lf s\n",cpu_offset, 1000*(end - start));

	errcode = clEnqueueReadBuffer(clCommandQue[0], symmat_mem_obj, CL_TRUE, 0, (M+1) * (N+1) * sizeof(DATA_TYPE), symmat_outputFromGpu, 0, NULL, NULL);
	if(errcode != CL_SUCCESS) printf("Error in reading GPU mem\n");

	t_start = rtclock();
	correlation(data, mean, stddev, symmat);
	t_end = rtclock(); 
	fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start);   


	compareResults(symmat, symmat_outputFromGpu);
	cl_clean_up();
	
	free(data);
	free(mean);
	free(stddev);
	free(symmat);
	free(symmat_outputFromGpu);
	
    return 0;
}
Beispiel #14
0
int main(int argc, char *argv[])
{
	int tmax = TMAX;
	int nx = NX;
	int ny = NY;

	POLYBENCH_1D_ARRAY_DECL(_fict_,DATA_TYPE,TMAX,TMAX);
	POLYBENCH_2D_ARRAY_DECL(ex,DATA_TYPE,NX,NY,nx,ny);
	POLYBENCH_2D_ARRAY_DECL(ey,DATA_TYPE,NX,NY,nx,ny);
	POLYBENCH_2D_ARRAY_DECL(hz,DATA_TYPE,NX,NY,nx,ny);
	POLYBENCH_2D_ARRAY_DECL(hz_outputFromGpu,DATA_TYPE,NX,NY,nx,ny);
	
	init_arrays(tmax, nx, ny, POLYBENCH_ARRAY(_fict_), POLYBENCH_ARRAY(ex), POLYBENCH_ARRAY(ey), POLYBENCH_ARRAY(hz));

	read_cl_file();
	cl_initialization();
	cl_mem_init(POLYBENCH_ARRAY(_fict_), POLYBENCH_ARRAY(ex), POLYBENCH_ARRAY(ey), POLYBENCH_ARRAY(hz));
	cl_load_prog();

	cl_launch_kernel(tmax, nx, ny);

	errcode = clEnqueueReadBuffer(clCommandQue, hz_mem_obj, CL_TRUE, 0, NX * NY * sizeof(DATA_TYPE), POLYBENCH_ARRAY(hz_outputFromGpu), 0, NULL, NULL);
	if(errcode != CL_SUCCESS) printf("Error in reading GPU mem\n");	

	#ifdef RUN_ON_CPU

		/* Start timer. */
	  	polybench_start_instruments;

		runFdtd(tmax, nx, ny, POLYBENCH_ARRAY(_fict_), POLYBENCH_ARRAY(ex), POLYBENCH_ARRAY(ey), POLYBENCH_ARRAY(hz));
	
		/* Stop and print timer. */
		printf("CPU Time in seconds:\n");
	  	polybench_stop_instruments;
	 	polybench_print_instruments;

		compareResults(nx, ny, POLYBENCH_ARRAY(hz), POLYBENCH_ARRAY(hz_outputFromGpu));

	#else //prevent dead code elimination

		polybench_prevent_dce(print_array(nx, ny, POLYBENCH_ARRAY(hz_outputFromGpu)));

	#endif //RUN_ON_CPU

	POLYBENCH_FREE_ARRAY(_fict_);
	POLYBENCH_FREE_ARRAY(ex);
	POLYBENCH_FREE_ARRAY(ey);
	POLYBENCH_FREE_ARRAY(hz);
	POLYBENCH_FREE_ARRAY(hz_outputFromGpu);

	cl_clean_up();
	
    return 0;
}
Beispiel #15
0
int main() {
  double t_start, t_end;

  init_arrays();	
  syrkGPU();
  t_start = rtclock();
  syrk();
  t_end = rtclock();
  fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start);
  compareResults();
  return 0;
}
sph_model::sph_model(sph_cache& cache,
                     const char *vert,
                     const char *frag, int n, int d, int s) :
    cache(cache), depth(d), size(s), time(1), status(cube_size(d), s_halt)
{
    init_program(vert, frag);
    init_arrays(n);
    
    zoomv[0] =  0;
    zoomv[1] =  0;
    zoomv[2] = -1;
    zoomk    =  1;
}
Beispiel #17
0
int main(int argc, char** argv)
{
	int m = M;
	int n = N;
	double t_start, t_end;

	/* Array declaration */
	DATA_TYPE float_n = 321414134.01;
	DATA_TYPE eps = 0.005;
	DATA_TYPE data[M + 1][N + 1];
	DATA_TYPE data_Gpu[M + 1][N + 1];
	DATA_TYPE mean[M + 1];
	DATA_TYPE mean_Gpu[M + 1];
	DATA_TYPE stddev[M + 1];
	DATA_TYPE stddev_Gpu[M + 1];
	DATA_TYPE symmat[M + 1][M + 1];
	DATA_TYPE symmat_outputFromGpu[M + 1][M + 1];

	/* Initialize array. */
	init_arrays(data, data_Gpu);
	
	#pragma hmpp corr allocate
    
	#pragma hmpp corr advancedload, args[pdata;psymmat;pstddev;pmean;pfloat_n;peps]

	t_start = rtclock();
	
	#pragma hmpp corr callsite, args[pdata;psymmat;pstddev;pmean;pfloat_n;peps].advancedload=true, asynchronous
	runCorr(data_Gpu, symmat_outputFromGpu, stddev_Gpu, mean_Gpu, float_n, eps);
    
	#pragma hmpp corr synchronize

	t_end = rtclock();
	fprintf(stderr, "GPU Runtime: %0.6lfs\n", t_end - t_start);
    
	#pragma hmpp corr delegatedstore, args[pdata;psymmat;pstddev;pmean]
	#pragma hmpp corr release
	
	t_start = rtclock();
	
	runCorr(data, symmat, stddev, mean, float_n, eps);
	
	t_end = rtclock();
	fprintf(stderr, "CPU Runtime: %0.6lfs\n", t_end - t_start);
	
	compareResults(symmat, symmat_outputFromGpu);

	return 0;
}
Beispiel #18
0
void main (void) {
	Uint32 fs=DSK6713_AIC23_FREQ_16KHZ;  // Sampling frequency
	SEQUENCE seq = {input_bits, ninput_bits, 0};
	USER usr;
	init_arrays();
	init_user(&usr);                     // Init state variables
	comm_intr(fs);                       // DSK Init
	while(1) {
		wait_buffer();
		read_bits(&seq, usr.nbits, usr.bits);
		cod_rz(&usr, oblock, (usr.lSymb)/2);
		//cod_polar (&usr, oblock);
		//cod_bipolar (&usr, oblock);
	}
}
Beispiel #19
0
int main()
{
  init_arrays();

  double annot_t_start=0, annot_t_end=0, annot_t_total=0;
  int annot_i;

  for (annot_i=0; annot_i<REPS; annot_i++)
    {
      annot_t_start = rtclock();

      register int i,j,k;

      for (k=0; k<=N-1; k++)
	{
	  for (j=k+1; j<=N-1; j++)
	    A[k][j] = A[k][j]/A[k][k];
	  for(i=k+1; i<=N-1; i++)
	    for (j=k+1; j<=N-1; j++)
	      A[i][j] = A[i][j] - A[i][k]*A[k][j];
	}

      annot_t_end = rtclock();
      annot_t_total += annot_t_end - annot_t_start;
    }

  annot_t_total = annot_t_total / REPS;

#ifndef TEST
  printf("%f\n", annot_t_total);
#else
  {
    int i, j;
    for (i=0; i<N; i++) {
      for (j=0; j<N; j++) {
        if (j%100==0)
          printf("\n");
        printf("%f ",A[i][j]);
      }
      printf("\n");
    }
  }
#endif

  return ((int) A[0][0]);

}
Beispiel #20
0
void *pthreads_each(void *rank_ptr) {
  uint32_t rank;
#if !defined(DEBUG)
  uint32_t bytes_transferred;
#endif

#if defined(PAPI_ENABLED) && !defined(DEBUG)
  int num_sets;
  PAPI_event_set_wrapper_t* event_sets;

  papi_filter_events(desired_events, num_desired, &event_sets, &num_sets);
#endif

  rank = *((uint32_t *) rank_ptr);

#if defined(AFFINITY_ENABLED)
  Affinity_Bind_Thread(rank);
  Affinity_Bind_Memory(rank);
#endif

#if defined(DEBUG)
  init_arrays(rank);
  barrier_wait(&my_barrier, rank);
  stream_per_thread[read_arrays_case][write_arrays_case][write_type_case][load_type_case][loop_unroll_case][prefetch_distance_case](rank);
  barrier_wait(&my_barrier, rank);
  if (rank == 0) {
    print_arrays();
  }
#else
  TIMER_MAKE_MEASUREMENTS(stream_per_thread[read_arrays_case][write_arrays_case][write_type_case][load_type_case][loop_unroll_case][prefetch_distance_case](rank), results, rank, NUM_TRIALS);
  if (rank == 0) {
    printf("TIME (IN SECONDS)\n");
    print_max_timer_measurements(results, numThreads, NUM_TRIALS, median_counts_per_sec);
    printf("\nBANDWIDTH (IN GB/S, WHERE 1 GB/S = 10^9 B/S)\n");
    bytes_transferred = 0;
    if (write_type_case == 0) {
      bytes_transferred = (numReadArraysPerThread + 2*numWriteArraysPerThread) * (numThreads * threadArrayLength * NUM_BYTES_PER_DOUBLE);
    }
    else if (write_type_case == 1) {
      bytes_transferred = (numReadArraysPerThread + numWriteArraysPerThread) * (numThreads * threadArrayLength * NUM_BYTES_PER_DOUBLE);
    }
    print_bandwidth_measurements(results, bytes_transferred, numThreads, NUM_TRIALS, median_counts_per_sec);
    printf("\n\n");
  }
#endif
  pthread_exit((void*) 0);
}
Beispiel #21
0
int main(int argc, char* argv[]) 
//int main(void) 
{
	double t_start, t_end;

	DATA_TYPE* A;
	DATA_TYPE* C;
	DATA_TYPE* C_outputFromGpu;
        if(argc==2){
          printf("arg 1 = %s\narg 2 = %s\n", argv[0], argv[1]);
          cpu_offset = atoi(argv[1]);
        }


	A = (DATA_TYPE*)malloc(N*M*sizeof(DATA_TYPE));
	C = (DATA_TYPE*)malloc(N*M*sizeof(DATA_TYPE));
	C_outputFromGpu = (DATA_TYPE*)malloc(N*M*sizeof(DATA_TYPE));

	init_arrays(A, C);
	read_cl_file();
	cl_initialization_fusion();
	//cl_initialization();
	cl_mem_init(A, C);
	cl_load_prog();

	cl_launch_kernel();

	errcode = clEnqueueReadBuffer(clCommandQue[0], c_mem_obj, CL_TRUE, 0, M * N * sizeof(DATA_TYPE), C_outputFromGpu, 0, NULL, NULL);
	if(errcode != CL_SUCCESS) printf("Error in reading GPU mem\n");  

	t_start = rtclock();
	syrk(A, C);
	t_end = rtclock(); 
	fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start);   
	compareResults(C, C_outputFromGpu);
	cl_clean_up();
	
	free(A);
	free(C);
	free(C_outputFromGpu);

	return 0;
}
Beispiel #22
0
int main(void) 
{
	double t_start, t_end;
	
	DATA_TYPE* _fict_;
	DATA_TYPE* ex;
	DATA_TYPE* ey;
	DATA_TYPE* hz;
	DATA_TYPE* hz_outputFromGpu;

	_fict_ = (DATA_TYPE*)malloc(TMAX*sizeof(DATA_TYPE));
	ex = (DATA_TYPE*)malloc(NX*(NY+1)*sizeof(DATA_TYPE));
	ey = (DATA_TYPE*)malloc((NX+1)*NY*sizeof(DATA_TYPE));
	hz = (DATA_TYPE*)malloc(NX*NY*sizeof(DATA_TYPE));
	hz_outputFromGpu = (DATA_TYPE*)malloc(NX*NY*sizeof(DATA_TYPE));
	
	int i;
	init_arrays(_fict_, ex, ey, hz);
	read_cl_file();
	cl_initialization();
	cl_mem_init(_fict_, ex, ey, hz);
	cl_load_prog();

	cl_launch_kernel();

	errcode = clEnqueueReadBuffer(clCommandQue, hz_mem_obj, CL_TRUE, 0, NX * NY * sizeof(DATA_TYPE), hz_outputFromGpu, 0, NULL, NULL);
	if(errcode != CL_SUCCESS) printf("Error in reading GPU mem\n");	

	t_start = rtclock();
	runFdtd(_fict_, ex, ey, hz);
	t_end = rtclock(); 
	fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start);   
	compareResults(hz, hz_outputFromGpu);
	cl_clean_up();
	
	free(_fict_);
	free(ex);
	free(ey);
	free(hz);
	free(hz_outputFromGpu);
	
    	return 0;
}
Beispiel #23
0
/*
 * Call this function to simulate init_stuff() and populate the *_info arrays
 */
void read_edit_files(void) {
	char configpath[512], libpath[512], datapath[512];

	my_strcpy(configpath, DEFAULT_CONFIG_PATH, sizeof(configpath));
	my_strcpy(libpath, DEFAULT_LIB_PATH, sizeof(libpath));
	my_strcpy(datapath, DEFAULT_DATA_PATH, sizeof(datapath));

	configpath[511] = libpath[511] = datapath[511] = '\0';

	if (!suffix(configpath, PATH_SEP))
		my_strcat(configpath, PATH_SEP, sizeof(configpath));
	if (!suffix(libpath, PATH_SEP))
		my_strcat(libpath, PATH_SEP, sizeof(libpath));
	if (!suffix(datapath, PATH_SEP))
		my_strcat(datapath, PATH_SEP, sizeof(datapath));

	init_file_paths(configpath, libpath, datapath);
	init_arrays();
}
Beispiel #24
0
/********************************** Main routine ************************************/
void main()
{      

  /* setup arrays */
  init_arrays();  

	/* initialize board and the audio port */
  init_hardware();
	
  /* initialize hardware interrupts */
  init_HWI();

    
  /* loop indefinitely, waiting for interrupts */  					
  while(1) 
  {
  	  	wait_buffer();
  };
  
}
Beispiel #25
0
int main()
{
    init_arrays();
    
    char str[100];
    while(1)
    {
        gets(str);
        if(!strcmp(str, "uci"))
        {
            UCI();
            break;
        }
        if(!strcmp(str, "console"))
        {
            console();
            break;
        }
    }
    return 0;
}
Beispiel #26
0
int main(void) 
{
	double t_start, t_end;

	DATA_TYPE* A;
	DATA_TYPE* B;
	DATA_TYPE* C;
	DATA_TYPE* C_outputFromGpu;

	A = (DATA_TYPE*)malloc(N*M*sizeof(DATA_TYPE));
	B = (DATA_TYPE*)malloc(N*M*sizeof(DATA_TYPE));
	C = (DATA_TYPE*)malloc(N*M*sizeof(DATA_TYPE));
	C_outputFromGpu = (DATA_TYPE*)malloc(N*M*sizeof(DATA_TYPE));

	init_arrays(A, B, C);
	read_cl_file();
	cl_initialization();
	cl_mem_init(A, B, C);
	cl_load_prog();

	cl_launch_kernel();

	errcode = clEnqueueReadBuffer(clCommandQue, c_mem_obj, CL_TRUE, 0, N*M*sizeof(DATA_TYPE), C_outputFromGpu, 0, NULL, NULL);
	if(errcode != CL_SUCCESS) printf("Error in reading GPU mem\n");

	t_start = rtclock();
	syr2k(A, B, C);
	t_end = rtclock(); 
	fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start);   
	compareResults(C, C_outputFromGpu);
	cl_clean_up();

	free(A);
	free(B);
	free(C);
	free(C_outputFromGpu);

	return 0;
}
Beispiel #27
0
int main()
{
  init_arrays();

  double annot_t_start=0, annot_t_end=0, annot_t_total=0;
  int annot_i;

  for (annot_i=0; annot_i<REPS; annot_i++)
  {
    annot_t_start = rtclock();
    
    int i,j,k,t;

    for(t=0; t<tmax; t++) 
      {
	for (j=0; j<ny; j++)
	  ey[0][j] = t;
	for (i=1; i<nx; i++)
	  for (j=0; j<ny; j++)
	    ey[i][j] = ey[i][j] - 0.5*(hz[i][j]-hz[i-1][j]);
	for (i=0; i<nx; i++)
	  for (j=1; j<ny; j++)
	    ex[i][j] = ex[i][j] - 0.5*(hz[i][j]-hz[i][j-1]);
	for (i=0; i<nx; i++)
	  for (j=0; j<ny; j++)
	    hz[i][j]=hz[i][j]-0.7*(ex[i][j+1]-ex[i][j]+ey[i+1][j]-ey[i][j]);
      }


    annot_t_end = rtclock();
    annot_t_total += annot_t_end - annot_t_start;
  }
  
  annot_t_total = annot_t_total / REPS;
  printf("%f\n", annot_t_total);

  return ((int) hz[0][0]); 

}
Beispiel #28
0
int main( int argc, char *argv[] )
{
   glutInit(&argc, argv);
   glutInitWindowPosition(0, 0);
   glutInitWindowSize(300, 300);
   glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE );

   Window = glutCreateWindow("Texture Objects");
   glewInit();
   if (!Window) {
      exit(1);
   }

   init();
   init_arrays();

   glutReshapeFunc( reshape );
   glutKeyboardFunc( key );
   if (Anim)
      glutIdleFunc( idle );
   glutDisplayFunc( draw );
   glutMainLoop();
   return 0;
}
Beispiel #29
0
int	read_stimuli(char *filename)
{
	FILE	*id;
	int	n,i;
	int	here;
	int	inada;
	int	line=0;
	char	buf[BUF_LEN+1];
	pchar	err_msg;

	if (verbose) printf("Reading stimuli from \"%s\"...\n",filename);

	if ((id=fopen(filename,"r+")) == (FILE *) 0) {
		puts("File does not exist");
		return 0;
	}
	for (n=0;n<5;++n) {
		if (!fgets(buf,BUF_LEN,id)) 
			goto end_of_file;
		++line;
	}

	if (strlen(global_msg) != strlen(buf)) {
		err_msg=global_msg;
		goto invalid_file_format;
	}

	if (!fgets(buf,BUF_LEN,id))
		goto end_of_file;
	++line;

	if (sscanf(buf,global_str,&num_trials,&num_stimuli,&num_categories,
		&num_choices_per_category,&max_same_categories,
		&ms_btwn_stim,&ms_btwn_trials,&total_ms_on) != 8)
		goto unexpected_args;

	validate_args();
	if (!init_arrays()) {
		fclose(id);
		return 0;
	}

	for (n=0;n<2;++n) {
		if (!fgets(buf,BUF_LEN,id))
			goto end_of_file;
		++line;
	}
	if (strlen(color_msg) != strlen(buf)) {
		err_msg=color_msg;
		goto invalid_file_format;
	}
	if (!fgets(buf,BUF_LEN,id))
		goto end_of_file;
	++line;

	if (get_colors_from == COLORS_FROM_STM_FILE) {
		if (sscanf(buf,color_str,&_palette[0],&_palette[1],&_palette[2],
			&_palette[3],&_palette[4],&_palette[5],&_palette[6]) != 7)
			goto unexpected_args;
	}
	/* Else use the ones parsed in from .cfg file, if at all */

	for (n=0;n<2;++n) {
		if (!fgets(buf,BUF_LEN,id))
			goto end_of_file;
		++line;
	}
	if (strlen(stm_msg) != strlen(buf)) {
		err_msg=stm_msg;
		goto invalid_file_format;
	}

	for (n=0,here=0;n<num_trials;++n) {
		for (i=0;i<num_stimuli;++i,++here) {
			if (!fgets(buf,BUF_LEN,id))
				goto end_of_file;
			++line;
			if (sscanf(buf,stm_str,&inada,&inada,
				&_color[here],&_shape[here],
				&_num[here],&_size[here],&_filled[here],&_pattern[here],
				&_matches[here],&inada) != 10)
				goto unexpected_args;

			BOUNDED(_shape[here],0,num_choices_per_category-1);
			BOUNDED(_num[here],0,num_choices_per_category-1);
			BOUNDED(_size[here],0,num_choices_per_category-1);
			BOUNDED(_filled[here],0,num_choices_per_category-1);
			if(use_many_colors) {
				BOUNDED(_color[here],0,MAX_COLORS-1);
				BOUNDED(_pattern[here],0,MAX_PATTERNS-1);
			}
			else {
				BOUNDED(_color[here],0,num_choices_per_category-1);
				BOUNDED(_pattern[here],0,num_choices_per_category-1);
			}
			_Orig_stim[here] = item_val(here);	// for randomization purposes
		}
	}
	fclose(id);

	calc_matches();

	return 1;

unexpected_args:
	if (verbose) printf("Unexpected args on line %i\n", line);
	fclose(id);
	return 0;

end_of_file:
	puts("File ended too early");
	fclose(id);
	return 0;

invalid_file_format:
	printf("Invalid file format.  Expects as line %i:\n",line);
	puts(err_msg);
	fclose(id);
	return 0;
}
Beispiel #30
0
int	read_data(FILE *id,int line)
{
	static	init=0;
	int	n,i;
	int	here;
	int	len;
	int	inada;
	char	Lbuf[BUF_LEN+1];

	if (!init) {
		init=1;
		if (!fgets(Lbuf,BUF_LEN,id))
			goto error;
	}

	for (n=0;n<23;++n,++line) {
		switch(n) {
			case 0: strcpy(user_name,&Lbuf[6]); len=strlen(user_name); user_name[len-1]='\0'; break;
			case 1: strcpy(date_str,&Lbuf[6]); len=strlen(date_str); date_str[len-1]='\0'; break;
			case 2: strcpy(time_str,&Lbuf[6]); len=strlen(time_str); time_str[len-1]='\0'; break;
			case 3: break;
			case 4:
				if (strncmp(Lbuf,"Sex:",4)!=0) {
					n= 12;
					played_before=1;
					goto Parse_Exp_Type;
				}
				if (sscanf(Lbuf,"Sex: %i\n", &sex)!=1)
					goto error;
				played_before=0;
				break;
			case 5: if (sscanf(Lbuf,"Age: %i\n", &age)!=1) goto error; break;
			case 6: if (sscanf(Lbuf,"Years_ed: %i\n", &years_ed)!=1) goto error; break;
			case 7: break;
			case 8: break;
			case 9: if (sscanf(Lbuf,"  Family data: %i %i %i %i %i %i\n",
				&familyH[0],&familyH[1],&familyH[2],&familyH[3],&familyH[4],&familyH[5])!=6)
					goto error;
				break;
			case 10: if (sscanf(Lbuf,"  Briggs data: %i %i %i %i %i %i %i %i %i %i %i %i\n",
				&indivH[0],&indivH[1],&indivH[2],&indivH[3],&indivH[4],&indivH[5],
				&indivH[6],&indivH[7],&indivH[8],&indivH[9],&indivH[10],&indivH[11])!=12)
					goto error;
				break;
			case 11: break;
Parse_Exp_Type:
			case 12: if (sscanf(Lbuf,"Experiment Type: %s",exp_type_str)!=1) goto error; break;
			case 13: strcpy(selfeval_str,&Lbuf[11]); len=strlen(selfeval_str); selfeval_str[len-1]='\0'; break;
			case 14: break;
			case 15: break;
			case 16: if (sscanf(Lbuf,global_str,
				&num_trials, &num_stimuli, &num_categories,&num_choices_per_category,
				&max_same_categories,&ms_btwn_stim,&ms_btwn_trials,&total_ms_on) != 8)
					goto error;
				break;
			case 17: break;
			case 18: break;
			case 19: if (sscanf(Lbuf,color_str,&_palette[0],&_palette[1],&_palette[2],
				&_palette[3],&_palette[4],&_palette[5],&_palette[6]) != 7)
					goto error;
				break;
			case 20: break;
			case 21: break;	// **STIMULI**
			case 22:	if (strcmp(Lbuf,stm_msg)!=0) goto error; break;
		}
		if (!fgets(Lbuf,BUF_LEN,id))
			goto error;
	}

	validate_args();
	if (!init_arrays())
		goto error;

	/** Read STIMULI **/
	for (n=0,here=0;n<num_trials;++n) {
		for (i=0;i<num_stimuli;++i,++here) {
			++line;
			if (sscanf(Lbuf,stm_str,&inada,&inada,
				&_color[here],&_shape[here],
				&_num[here],&_size[here],&_filled[here],&_pattern[here],
				&_matches[here],&inada) != 10)
				goto error;
			if (!fgets(Lbuf,BUF_LEN,id))
				goto error;
		}
	}

	/** Read RESPONSE DATA **/
	for (n=0;n<2;++n,++line)
		if (!fgets(Lbuf,BUF_LEN,id))
			goto error;

	if (strcmp(Lbuf,dat_msg)!=0)
		goto error;

	for (n=0;n<num_trials;++n,++line) {
		if (!fgets(Lbuf,BUF_LEN,id))
			goto error;
	 	if (sscanf(Lbuf,dat_str,
			&inada,&_choice[n],
			&inada,&inada,&inada,&inada,&inada,&inada,&inada,	// item descipt
			&inada,	// item_number
			&_match_min[n],
			&_match_max[n],
			&_time[n]) != 13)
		 		goto error;
	}

	/** Read and discard previous analysis, up to start of appended file **/
	while(1) {
		if (!fgets(Lbuf,BUF_LEN,id))
			return -1;	// since end of file
		if (strncmp(Lbuf,"Name:",5)==0)
			break;
		++line;
	}
	return line;	// so can print error messages in appended file

error:
	printf("Unexpected args on line %i\n", line);
	return -1;
}