Beispiel #1
0
int main(int argc, char *argv[]) {

     double a[MAXVSIZE], b[MAXVSIZE], c[MAXVSIZE];
     int i,n;
     long long before, after;
     float rtime, ptime, mflops;
     long long flpops; 

     if (PAPI_VER_CURRENT != 
		PAPI_library_init(PAPI_VER_CURRENT))
	ehandler("PAPI_library_init error.");


     printf("Enter vector size:  ");
     scanf("%d",&n);

     for (i=0;i<n;i++) {
       a[i] = i;
       b[i] = n-i;
     }

     PAPI_flops(&rtime, &ptime, &flpops, &mflops);
     loop(c,a,b,n);
     PAPI_flops(&rtime, &ptime, &flpops, &mflops);


     printf("Number of floatinig point operations = %lld\n",flpops);
     printf("mflops = %f\n",mflops);
     return 0;

}
Beispiel #2
0
int main(int argc, char **argv) {
    extern void dummy(void *);
    float matrixa[INDEX][INDEX], matrixb[INDEX][INDEX], mresult[INDEX][INDEX];
    float real_time, proc_time, mflops;
    long long flpins;
    int retval;
    int i,j,k;


    /* Initialize the Matrix arrays */
    for ( i=0; i<INDEX*INDEX; i++ ) {
        mresult[0][i] = 0.0;
        matrixa[0][i] = matrixb[0][i] = rand()*(float)1.1;
    }

    /* Setup PAPI library and begin collecting data from the counters */
    if((retval=PAPI_flops( &real_time, &proc_time, &flpins, &mflops))<PAPI_OK)
        test_fail(__FILE__, __LINE__, "PAPI_flops", retval);

    /* Matrix-Matrix multiply */
    for (i=0; i<INDEX; i++)
        for(j=0; j<INDEX; j++)
            for(k=0; k<INDEX; k++)
                mresult[i][j]=mresult[i][j] + matrixa[i][k]*matrixb[k][j];

    /* Collect the data into the variables passed in */
    if((retval=PAPI_flops( &real_time, &proc_time, &flpins, &mflops))<PAPI_OK)
        test_fail(__FILE__, __LINE__, "PAPI_flops", retval);

    printf("Real_time:\t%f\nProc_time:\t%f\nTotal flpins:\t%lld\nMFLOPS:\t\t%f\n",
           real_time, proc_time, flpins, mflops);
    printf("%s\tPASSED\n", __FILE__);
    PAPI_shutdown();
    exit(0);
}
Beispiel #3
0
void 
sort_profiler_t::flops_papi() {
#ifdef HAVE_PAPI
	int 		retval;
	float rtime, ptime, mflops;
	retval  = PAPI_flops(&rtime, &ptime, &_pri_p_flpops, &mflops);
	// assert (retval == PAPI_OK);
#else
	_pri_p_flpops =   0;
#endif	
}
Beispiel #4
0
void
sc_flops_papi (float *rtime, float *ptime, long long *flpops, float *mflops)
{
#ifdef SC_PAPI
    int                 retval;

    retval = PAPI_flops (rtime, ptime, flpops, mflops);
    SC_CHECK_ABORT (retval == PAPI_OK, "Papi not happy");
#else
    *rtime = *ptime = *mflops = 0.;
    *flpops = 0;
#endif
}
Beispiel #5
0
void pmm_timer_stop() {

#ifdef HAVE_PAPI
    int ret;

    ret = PAPI_flops(&papi_realtime, &papi_usedtime, &papi_complexity,
                     &papi_mflops);

    if(ret != PAPI_OK) {
        printf("Could not finalize PAPI_flops\n");
        printf("Your platform may not support the floating point operation.\n");
        exit(PMM_EXIT_GENFAIL);
    }
#else
    gettimeofday(&realtime_end, NULL);
#endif

    getrusage(RUSAGE_SELF, &usage_end);

    return;
}
Beispiel #6
0
int main( int argc, char *argv[] )
{
    unsigned iter;
    FILE *infile, *resfile;
    char *resfilename;

	double *tmp;

    // algorithmic parameters
    algoparam_t param;
    int np;

    double runtime, flop;
    double residual;

    // check arguments
    if( argc < 2 )
    {
	usage( argv[0] );
	return 1;
    }

    // check input file
    if( !(infile=fopen(argv[1], "r"))  ) 
    {
	fprintf(stderr, 
		"\nError: Cannot open \"%s\" for reading.\n\n", argv[1]);
      
	usage(argv[0]);
	return 1;
    }
  

    // check result file
    resfilename= (argc>=3) ? argv[2]:"heat.ppm";

    if( !(resfile=fopen(resfilename, "w")) )
    {
	fprintf(stderr, 
		"\nError: Cannot open \"%s\" for writing.\n\n", 
		resfilename);
      
	usage(argv[0]);
	return 1;
    }

    // check input
    if( !read_input(infile, &param) )
    {
	fprintf(stderr, "\nError: Error parsing input file.\n\n");
      
	usage(argv[0]);
	return 1;
    }

    print_params(&param);

    // set the visualization resolution
    param.visres = param.max_res;

    param.u     = 0;
    param.uhelp = 0;
    param.uvis  = 0;

    param.act_res = param.initial_res;

    #ifdef USEPAPI
	    float rtime, ptime, mflops;
	    long_long flpops;

	    PAPI_flops( &rtime, &ptime, &flpops, &mflops );
    #endif

    // loop over different resolutions
    while(1) {

	// free allocated memory of previous experiment
	if (param.u != 0)
	    finalize(&param);

	if( !initialize(&param) )
	{
	    fprintf(stderr, "Error in Jacobi initialization.\n\n");

	    usage(argv[0]);
	}

	fprintf(stderr, "Resolution: %5u\r", param.act_res);

	// full size (param.act_res are only the inner points)
	np = param.act_res + 2;
    
	// starting time
	runtime = wtime();
	residual = 999999999;

	iter = 0;
	while(1) {

	    switch( param.algorithm ) {

		case 0: // JACOBI
      
		    residual = relax_jacobi_return_residual(param.u, param.uhelp, np, np);
			tmp = param.u;
			param.u = param.uhelp;
			param.uhelp = tmp;
		    break;

		case 1: // GAUSS

		    relax_gauss(param.u, np, np);
		    residual = residual_gauss( param.u, param.uhelp, np, np);
		    break;
	    }
	    
	    iter++;

	    // solution good enough ?
	    if (residual < 0.000005) break;

	    // max. iteration reached ? (no limit with maxiter=0)
	    if (param.maxiter>0 && iter>=param.maxiter) break;
	    
	    if (iter % 100 == 0)
		fprintf(stderr, "residual %f, %d iterations\n", residual, iter);
	}

	// Flop count after <i> iterations
	flop = iter * 7.0 * param.act_res * param.act_res;
	// stopping time
	runtime = wtime() - runtime;

	fprintf(stderr, "Resolution: %5u, ", param.act_res);
	fprintf(stderr, "Time: %04.3f ", runtime);
	fprintf(stderr, "(%3.3f GFlop => %6.2f MFlop/s, ", 
		flop/1000000000.0,
		flop/runtime/1000000);
	fprintf(stderr, "residual %f, %d iterations)\n", residual, iter);

	#ifdef USEPAPI
		PAPI_flops( &rtime, &ptime, &flpops, &mflops );
		fprintf(stderr, "PAPI MFlop/s: %6.2f \n", 
			mflops);
	#endif

	// for plot...
	printf("%5d %f\n", param.act_res, flop/runtime/1000000);

	if (param.act_res + param.res_step_size > param.max_res) break;
	param.act_res += param.res_step_size;
    }

/*
    coarsen( param.u, np, np,
	     param.uvis, param.visres+2, param.visres+2 );
  
    write_image( resfile, param.uvis,  
		 param.visres+2, 
		 param.visres+2 );
*/
    finalize( &param );

    return 0;
}
Beispiel #7
0
int main(int argc, char *argv[]) {


	float rtime1, rtime2, ptime1, ptime2, mflops;
	long long flpops;

	unsigned long int tid;
	int num_hwcntrs = 0;
	int fip = 0, retval;
	float real_time, proc_time;
	long long flpins;

	int i;
	unsigned int EventSet = PAPI_NULL; 
    int count = 0, err_count = 0;


    PAPI_event_info_t info;

    long long ( values2[2] )[2];
    long long min, max;
    int PAPI_event, mythreshold = THRESHOLD;
    char event_name1[PAPI_MAX_STR_LEN];
    const PAPI_hw_info_t *hw_info = NULL;
    int num_events, mask;
    int num_flops = NUM_FLOPS;
    long long elapsed_us, elapsed_cyc;



tests_quiet( argc, argv );  /* Set TESTS_QUIET variable */



    retval = PAPI_library_init( PAPI_VER_CURRENT );
    if ( retval != PAPI_VER_CURRENT )
      test_fail( __FILE__, __LINE__, "PAPI_library_init", retval );

  retval = PAPI_create_eventset( &EventSet );
  if ( retval != PAPI_OK )
      test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval );

	/* Get hardware info */
  hw_info = PAPI_get_hardware_info(  );
  if ( hw_info == NULL )
      test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 2 );

  EventSet = 	add_two_nonderived_events( &num_events, &PAPI_event, &mask );

  printf("Using %#x for the overflow event\n",PAPI_event);

  if ( PAPI_event == PAPI_FP_INS ) {
      mythreshold = THRESHOLD;
  }
  else {
		#if defined(linux)
      mythreshold = ( int ) hw_info->cpu_max_mhz * 20000;
		#else
      mythreshold = THRESHOLD * 2;
		#endif
  }

  retval = PAPI_start( EventSet );
  if ( retval != PAPI_OK )
      test_fail( __FILE__, __LINE__, "PAPI_start", retval );

  do_flops( NUM_FLOPS );

	/* stop the calibration run */
  retval = PAPI_stop( EventSet, values2[0] );
  if ( retval != PAPI_OK )
      test_fail( __FILE__, __LINE__, "PAPI_stop", retval );


	/* set up overflow handler */
  retval = PAPI_overflow( EventSet, PAPI_event, mythreshold, 0, handler );
  if ( retval != PAPI_OK ) {
      test_fail( __FILE__, __LINE__, "PAPI_overflow", retval );
  }

	/* Start overflow run */
  retval = PAPI_start( EventSet );
  if ( retval != PAPI_OK ) {
      test_fail( __FILE__, __LINE__, "PAPI_start", retval );
  }

  do_flops( num_flops );

	/* stop overflow run */
  retval = PAPI_stop( EventSet, values2[1] );
  if ( retval != PAPI_OK )
      test_fail( __FILE__, __LINE__, "PAPI_stop", retval );
  retval = PAPI_overflow( EventSet, PAPI_event, 0, 0, handler );
  if ( retval != PAPI_OK )
      test_fail( __FILE__, __LINE__, "PAPI_overflow", retval );

  if ( !TESTS_QUIET ) {
      if ( ( retval =
         PAPI_event_code_to_name( PAPI_event, event_name1 ) ) != PAPI_OK )
         test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval );

     printf( "Test case: Overflow dispatch of 2nd event in set with 2 events.\n" );
     printf( "---------------------------------------------------------------\n" );
     printf( "Threshold for overflow is: %d\n", mythreshold );
     printf( "Using %d iterations\n", num_flops );
     printf( "-----------------------------------------------\n" );

     printf( "Test type    : %16d%16d\n", 1, 2 );
     printf( OUT_FMT, event_name1, ( values2[0] )[1], ( values2[1] )[1] );
     printf( OUT_FMT, "PAPI_TOT_CYC", ( values2[0] )[0], ( values2[1] )[0] );
     printf( "Overflows    : %16s%16d\n", "", total );
     printf( "-----------------------------------------------\n" );
 }

 retval = PAPI_cleanup_eventset( EventSet );
 if ( retval != PAPI_OK )
  test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset", retval );

retval = PAPI_destroy_eventset( &EventSet );
if ( retval != PAPI_OK )
  test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", retval );

if ( !TESTS_QUIET ) {
  printf( "Verification:\n" );
#if defined(linux) || defined(__ia64__) || defined(_POWER4)
  num_flops *= 2;
#endif
  if ( PAPI_event == PAPI_FP_INS || PAPI_event == PAPI_FP_OPS ) {
     printf( "Row 1 approximately equals %d %d\n", num_flops, num_flops );
 }
 printf( "Column 1 approximately equals column 2\n" );
 printf( "Row 3 approximately equals %u +- %u %%\n",( unsigned ) ( ( values2[0] )[1] / ( long long ) mythreshold ),( unsigned ) ( OVR_TOLERANCE * 100.0 ) );
}

min =
( long long ) ( ( ( double ) values2[0][1] * ( 1.0 - OVR_TOLERANCE ) ) /
  ( double ) mythreshold );
max =
( long long ) ( ( ( double ) values2[0][1] * ( 1.0 + OVR_TOLERANCE ) ) /
  ( double ) mythreshold );
printf( "Overflows: total(%d) > max(%lld) || total(%d) < min(%lld) \n", total,
  max, total, min );
if ( total > max || total < min )
  test_fail( __FILE__, __LINE__, "Overflows", 1 );



printf("Initial thread id is: %lu\n",tid);

	/* Initialize the PAPI library and get the number of counters available */

if ((num_hwcntrs = PAPI_num_counters()) <= 0)  
  handle_error(1);



  /*  The installation supports PAPI, but has no counters */
if ((num_hwcntrs = PAPI_num_counters()) == 0 )
    fprintf(stderr,"Info:: This machine does not provide hardware counters.");

printf("This system has %d available counters.\n", num_hwcntrs);

if (num_hwcntrs > 2)
  num_hwcntrs = 2;

	 /* Start counting events */




if (PAPI_start_counters(Events, num_hwcntrs) != PAPI_OK)
  handle_error(1);

if (argc != 8) {
  printf("\nError :: Ejecutar como : a.out archivo_BD Num_elem archivo_queries Num_queries N_THREADS numero_K Dimension_objetos\n");
  return 0;
}
TOPK = atoi(argv[6]);
DIM = atoi(argv[7]);
double **DB;
	double **Consultas; //Cola de consultas
	int N_QUERIES, N_DB;
	char str_f[256];
	double dato[DIM];
	int j;
	FILE *f_dist, *fquery;
	Elem *heap, e_temp,*answer;
	int *acum, N_THREADS;


	//N_THREADS es el nro. de threads con el que se lanzará la región paralela
	N_THREADS = atoi(argv[5]);
	//N_QUERIES es el nro. de consultas
	N_QUERIES = atoi(argv[4]);
	N_DB = atoi(argv[2]);

	printf("\nN_QUERIES = %d\nN_THREADS = %d\n", N_QUERIES, N_THREADS);
	fflush(stdout);

	acum = (int *) malloc(sizeof (int)*N_THREADS);
	for (i = 0; i < N_THREADS; i++)
		acum[i] = 0;

	sprintf(str_f, "%s", argv[1]);
	printf("\nAbriendo %s... ", argv[1]);
	fflush(stdout);
	f_dist = fopen(str_f, "r");
	printf("OK\n");
	fflush(stdout);


	Consultas = (double **) malloc(sizeof (double *)*N_QUERIES);
	for (i = 0; i < N_QUERIES; i++)
		Consultas[i] = (double *) malloc(sizeof (double)*DIM);

	DB = (double **) malloc(sizeof (double *)*N_DB);
	for (i = 0; i < N_DB; i++)
		DB[i] = (double *) malloc(sizeof (double)*DIM);

	answer = (Elem *)malloc(sizeof(Elem)*N_QUERIES*TOPK);

	printf("\nCargando DB... ");
	fflush(stdout);
	for (i = 0; i < N_DB; i++) {
		//Usar leedato_cophir() cuando se utilice la BD Cophir para no tener problemas con las ","
		//if (leedato_cophir(dato, f_dist) == ERROR || feof(f_dist))
		if (leedato(dato, f_dist) == ERROR || feof(f_dist)) {
			printf("\n\nERROR :: N_DB mal establecido\n\n");
			fflush(stdout);
			fclose(f_dist);
			break;
		}
		copiavalor(DB[i], dato);
	}
	fclose(f_dist);
	printf("OK\n");
	fflush(stdout);

	if ((fquery = fopen(argv[3], "r")) == NULL)
		printf("Error al abrir para lectura el archivo de qeuries: %s\n", argv[3]);
	else
		printf("Abriendo  para lectura %s\n", argv[3]);
	printf("\nCargando Consultas... ");
	fflush(stdout);
	for (i = 0; i < N_QUERIES; i++) {
		//Usar leedato_cophir() cuando se utilice la BD Cophir para no tener problemas con las ","
		//if (leedato_cophir(dato, fquery) == ERROR || feof(fquery))
		if (leedato(dato, fquery) == ERROR || feof(fquery)) {
			printf("\n\nERROR :: N_QUERIES mal establecido, Menos queries que las indicadas\n\n");
			fflush(stdout);
			fclose(fquery);
			break;
		}
		copiavalor(Consultas[i], dato);
	}
	fclose(fquery);
	printf("OK\n");
	fflush(stdout);

	PAPI_start_counters((int*) Events, NUM_EVENTS);
	omp_set_num_threads(N_THREADS);

	elapsed_us = PAPI_get_real_usec(  );

	elapsed_cyc = PAPI_get_real_cyc(  );

	retval =
	PAPI_thread_init( ( unsigned
		long ( * )( void ) ) ( omp_get_thread_num ) );
	if ( retval != PAPI_OK ) {
		if ( retval == PAPI_ECMP )
			test_skip( __FILE__, __LINE__, "PAPI_thread_init", retval );
		else
			test_fail( __FILE__, __LINE__, "PAPI_thread_init", retval );
	}

#pragma omp parallel shared(Consultas, DB, N_QUERIES, N_DB, N_THREADS, acum, DIM)
	{
		float real_time;
		struct timeval t1, t2;
		int i, j;
		Elem *heap, e_temp;
		double d;
		int n_elem = 0;
		int trid = omp_get_thread_num(); //ID del thread
		int procs = omp_get_num_threads(); //Nro. total de threads
		double suma = 0;

		suma = 0;
		heap = (Elem *) malloc(sizeof (Elem) * TOPK);

#pragma omp barrier

#pragma omp master
		{
			gettimeofday(&t1, 0);
		}

		//Cada hilo accede a un subconjunto de las consultas. Cada hio accede de manera circular al arreglo de consultas.
        for (i = trid; i < N_QUERIES; i += procs) {
         n_elem = 0;
         for (j = 0; j < N_DB; j++) {

            d = distancia(Consultas[i], DB[j]);
				//Si la distancia del objeto a la consulta es menor que la raíz del heap, entonces se inserta en el heap. La raíz siempre mantiene la mayor de las distancias

            if(n_elem<TOPK){
               e_temp.dist = d;
               e_temp.ind = j;
               inserta2(heap, &e_temp, &n_elem);
           }
           if (n_elem==TOPK){
               if (d < topH(heap, &n_elem)) {
                  e_temp.dist = d;
                  e_temp.ind = j;
					//Si el heap no está lleno, se inserta el elemento
                  if (n_elem < TOPK)
                     inserta2(heap, &e_temp, &n_elem);
						//Si el heap está lleno, se inserta el elemento nuevo y se saca el que era antes de mayor de distancia. popush2() hace las operaciones de sacar el elemento mayor e insertar el nuevo.
                 else
                     popush2(heap, &n_elem, &e_temp);
             }}
         }

			//En este punto del código se tienen los K elemntos más cercanos a la consulta en 'heap'. Se pueden extraer con extraer2()
         for (j = 0; j < TOPK ; j++) {
           extrae2(heap, &n_elem, &e_temp);
           answer[i*TOPK+j].ind = e_temp.ind;
           answer[i*TOPK+j].dist = e_temp.dist;
       }
			//Realizamos una operación con los resultados para que el compilador no evite hacer instrucciones que considere que el usuario no utiliza. Simplemente cada hilo suma las distancias de los elementos mas cercanos a la consulta 
   }
   Thread( 1000000 * ( tid + 1 ) );



   fflush(stdout);

#pragma omp barrier

#pragma omp master
   {   

    if ( fip > 0 ) {
		/* Setup PAPI library and begin collecting data from the counters */
       if ( fip == 1 ) {
          if ( ( retval =
             PAPI_flips( &real_time, &proc_time, &flpins,
                &mflops ) ) < PAPI_OK )
             test_fail( __FILE__, __LINE__, "PAPI_flips", retval );
     } else {
      if ( ( retval =
         PAPI_flops( &real_time, &proc_time, &flpins,
            &mflops ) ) < PAPI_OK )
         test_fail( __FILE__, __LINE__, "PAPI_flops", retval );
 }

 gettimeofday(&t2, 0);
 real_time = (t2.tv_sec - t1.tv_sec) + (float) (t2.tv_usec - t1.tv_usec) / 1000000;

 Salida_Multihilo = fopen("Salida_Multihilo.txt", "w");
 for (i = 0; i < N_QUERIES; ++i){
  fprintf(Salida_Multihilo, "Consulta id:: %d\n",i);
  for (j = 0; j < TOPK; ++j){
     fprintf(Salida_Multihilo,"ind = %d :: dist = %f\n",answer[(i*TOPK)+j].ind,answer[(i*TOPK)+j].dist);
 }
 fprintf(Salida_Multihilo, "---------------------------------\n");
}
fclose(Salida_Multihilo);

printf("\n\nK = %d", TOPK);
printf("\nReal Time = %f segundos.\n", real_time);
fflush(stdout);


if ( fip == 1 ) {
  if ( ( retval =
     PAPI_flips( &real_time, &proc_time, &flpins,
        &mflops ) ) < PAPI_OK )
     test_fail( __FILE__, __LINE__, "PAPI_flips", retval );
} else {
  if ( ( retval =
     PAPI_flops( &real_time, &proc_time, &flpins,
        &mflops ) ) < PAPI_OK )
     test_fail( __FILE__, __LINE__, "PAPI_flops", retval );
}

if ( !TESTS_QUIET ) {
  if ( fip == 1 ) {
     printf( "Real_time: %f Proc_time: %f Total flpins: ", real_time,
        proc_time );
 } else {
     printf( "Real_time: %f Proc_time: %f Total flpops: ", real_time,
        proc_time );
 }
 printf( LLDFMT, flpins );
 printf( " MFLOPS: %f\n", mflops );
}
}

}
free(heap);



	}//end pragma omp parallel

	elapsed_cyc = PAPI_get_real_cyc(  ) - elapsed_cyc;
	elapsed_us = PAPI_get_real_usec(  ) - elapsed_us;

	if ( !TESTS_QUIET ) {
		printf( "Master real usec   : \t%lld\n", elapsed_us );
		printf( "Master real cycles : \t%lld\n", elapsed_cyc );
	}

	const PAPI_hw_info_t *hwinfo = NULL;
	const PAPI_mh_tlb_info_t *mhinfo = NULL;
	const  PAPI_mh_cache_info_t *mhcacheinfo = NULL;
	const PAPI_mh_level_t *mhlevel = NULL;


	if (PAPI_library_init(PAPI_VER_CURRENT) != PAPI_VER_CURRENT)
		exit(1);
	if ((hwinfo = PAPI_get_hardware_info()) == NULL)
		exit(1);	
	if ((mhinfo = PAPI_get_hardware_info()) == NULL)
		exit(1);
	if ((mhcacheinfo = PAPI_get_hardware_info()) == NULL)
		exit(1);
	if ((mhlevel = PAPI_get_hardware_info()) == NULL)
		exit(1);

	printf("\n\nA continuación información actual del equipo\n\n");

	printf("MH Type %d - Num entries %d  - Associativity %d \n",mhinfo->type, mhinfo->num_entries, mhinfo->associativity);
	printf("Cache MH type %d size %d line size %d num_lines %d Associativity %d\n\n",mhcacheinfo->type, mhcacheinfo->size,mhcacheinfo->line_size, mhcacheinfo->num_lines, mhcacheinfo->associativity);



    retval=papi_print_header("Available PAPI preset and user defined events plus hardware information.\n",&hwinfo );


    printf("Total hardware flops = %lld\n",(float)values[1]);
    printf("L2 data cache misses is %lld\n", values[0]);






    retval = PAPI_stop_counters(values, NUM_EVENTS);
    return 0;
}