示例#1
0
int main(){
  double performance;
  perf_t start,stop;

  double a,b,c;
  a = 1;
  b = 1;
  long flop = 1;

  // Executions a vide, flush potentiel, ...

  // Performance d'une addition scalaire
  perf(&start);
  c = a + b; 
  perf(&stop);

  // Verification
  printf("%lf = %lf + %lf\n", c, a, b);

  // Performance
  perf_diff(&start, &stop);
  performance = perf_mflops(&stop, flop); 
  printf("Mflop/s : %lf \n", performance);
 
  return 0;
}
示例#2
0
文件: driver.c 项目: Moeryn/PRCD-TP4
void test_question_five(){
  double performance;
  perf_t start,stop;

  int size = 100000;


  double * A =(double*) malloc(sizeof(double)*size);
  double * B =(double*) malloc(sizeof(double)*size);
  
  matrix_init(A,size,1);
  matrix_init(B,size,1);

  long flop = 2*size;

  // Executions a vide, flush potentiel, ...

  // Performance d'une addition scalaire
  perf(&start);
  double res = cblas_ddot(size,A,1,B,1);
  perf(&stop);

  // Verification
  printf("Result %lf\n",res);
  // Performance
  perf_diff(&start, &stop);
  performance = perf_mflops(&stop, flop); 
  printf("Mflop/s : %lf \n", performance);

  free(A);
  free(B);


}
示例#3
0
文件: driver.c 项目: Moeryn/PRCD-TP4
void test_question_six(){
  double performance;
  perf_t start,stop;


  int size = 1000000;
  double * A = (double*) malloc(sizeof(double)*size);
  double * B = (double*) malloc(sizeof(double)*size);
  
  matrix_init(A,size,1);
  matrix_init(B,size,1);
  


  int size_temp = 50;

  long flop;
  double res;
  while(size_temp <= size){
    flop = 2*size_temp;
    // Performance d'une addition scalaire
    perf(&start);
    res = cblas_ddot(size_temp,A,1,B,1);
    perf(&stop);
    
    
    
    perf_diff(&start, &stop);
    performance = perf_mflops(&stop, flop); 
    printf("%d %lf \n",size_temp, performance);
     
    size_temp = size_temp + 0.25 * size_temp;
  }

  free(A);
  free(B);

}
示例#4
0
文件: main.c 项目: Moeryn/LU-MPI
int main(){
  int size;
  double performance, performance1;
  perf_t start,stop,start1,stop1;
  unsigned long long flop;

  printf("################################################################################\n");
  printf("                               Performance test                                 \n");
  printf("################################################################################\n");
  
  printf("I - TRSM\n");
  printf("________\n\n");
  printf("Initiating matrix ...");
  fflush(stdout);
  size = 4000;
  flop = (unsigned long long ) size*size*size;
  double * A = (double*) malloc(sizeof(double)*size*size);
  double * B = (double*) malloc(sizeof(double)*size*size);
  initiate_matrix(size,size,A);
  initiate_matrix(size,size,B);
  printf("[OK]\n");
  printf("Running cblas_trsm ...");
  fflush(stdout);
  perf(&start);
  cblas_dtrsm(CblasColMajor, CblasLeft, 121, CblasNoTrans, CblasNonUnit,size,size,1.,A,size,B,size);
  perf(&stop);
  perf_diff(&start, &stop);
  performance = perf_mflops(&stop, flop); 
  printf("[OK]\n");
  printf("Running PRCD trsm ...");
  fflush(stdout);
  perf(&start1);
  trsm_l(size,A,B,LOWER,size);
  perf(&stop1);
  perf_diff(&start1, &stop1);
  performance1= perf_mflops(&stop1, flop); 
  printf("[OK]\n");
   printf("Time Cblas version : ");
  perf_printh(&stop);
  printf("Time PRCD version : ");
  perf_printh(&stop1);
  printf("Matrix size : %dx%d\n",size,size);
  printf("Cblas version : %lf Mflop/s\n",performance);
  printf("PRCD version :  %lf Mflop/s\n",performance1);
  free(A);
  free(B);

  printf("\n\nII - LU seq\n");
  printf("___________\n\n");
  printf("Initiating matrix ...");
  fflush(stdout);
  size = 4000;
  flop = (unsigned long long) 2*size*size*size/3;
  A = (double*) malloc(sizeof(double)*size*size);
  initiate_matrix(size,size,A);
  printf("[OK]\n");
  printf("Running cblas_lu ...");
  fflush(stdout);
  perf(&start);
  //lapack_int * aa = malloc(sizeof(lapack_int));
  // LAPACKE_dgetrf(LAPACK_COL_MAJOR, size, size, A, size, aa );
  //  cblas_dtrsm(CblasColMajor, CblasLeft, 121, CblasNoTrans, CblasNonUnit,size,size,1.,A,size,B,size);
  perf(&stop);
  perf_diff(&start, &stop);
  performance = perf_mflops(&stop, flop); 
  printf("[OK]\n");
  printf("Running PRCD simple_lu ...");
  fflush(stdout);
  perf(&start1);
  simple_lu(size,A,size);
  perf(&stop1);
  perf_diff(&start1, &stop1);
  performance1= perf_mflops(&stop1, flop); 
  printf("[OK]\n");
  printf("Matrix size : %dx%d\n",size,size);
  //  printf("Cblas version : %lf Mflop/s\n",performance);
  printf("PRCD version :  %lf Mflop/s\n",performance1);
  free(A);

  printf("\n\nIII - Best block size on the architecture\n");
  printf("_________________________________________\n\n");
  printf("Initiating matrix ...");
  fflush(stdout);
  printf("[OK]\n");
  
  int values[10] = {100,200,300,500,600,700,800,900,1000,1100};
  int i;
  for ( i = 0; i < 10 ; ++i){
    size = values[i];
    flop = (unsigned long long ) 2*size*size*size/3;
    A = (double*) malloc(sizeof(double)*size*size);
    initiate_matrix(size,size,A);
    
    fflush(stdout);
    perf(&start1);
    simple_lu(size,A,size);
    perf(&stop1);
    perf_diff(&start1, &stop1);
    performance1= perf_mflops(&stop1, flop); 
    printf("BlockSize %d  %lf Mflop/s\n",size,performance1);
    free(A);
  }


  printf("\n\nIV - Block version of LU seq\n");
  printf("____________________________\n");
  printf("Initiating matrix ...");
  fflush(stdout);
  size = 4000;
  flop = (unsigned long long) 2*size*size*size/3;
  A = (double*) malloc(sizeof(double)*size*size);
  initiate_matrix(size,size,A);
  printf("[OK]\n");
  printf("Running cblas_lu ...");
  fflush(stdout);
  perf(&start);
  // LAPACKE_dgetrf(LAPACK_COL_MAJOR, size, size, A, size, aa );
  //  cblas_dtrsm(CblasColMajor, CblasLeft, 121, CblasNoTrans, CblasNonUnit,size,size,1.,A,size,B,size);
  perf(&stop);
  perf_diff(&start, &stop);
  performance = perf_mflops(&stop, flop); 
  printf("[OK]\n");
  printf("Running PRCD simple_lu block using block of size %d...",200);
  fflush(stdout);
  perf(&start1);
  lu_distributed(size,A,200);
  perf(&stop1);
  perf_diff(&start1, &stop1);
  performance1= perf_mflops(&stop1, flop); 
  printf("[OK]\n");
  printf("Matrix size : %dx%d\n",size,size);
  //  printf("Cblas version : %lf Mflop/s\n",performance);
  printf("PRCD version :  %lf Mflop/s\n",performance1);
  free(A);

  printf("\n\nIV - Block version of LU parallel\n");
  printf("_________________________________\n");
  printf("Initiating matrix ...");
  fflush(stdout);
  size = 4000;
  flop = (unsigned long long) 2*size*size*size/3;
  A = (double*) malloc(sizeof(double)*size*size);
  initiate_matrix(size,size,A);
  printf("[OK]\n");
  printf("Running cblas_lu ...");
  fflush(stdout);
  perf(&start);
  // LAPACKE_dgetrf(LAPACK_COL_MAJOR, size, size, A, size, aa );
  //  cblas_dtrsm(CblasColMajor, CblasLeft, 121, CblasNoTrans, CblasNonUnit,size,size,1.,A,size,B,size);
  perf(&stop);
  perf_diff(&start, &stop);
  performance = perf_mflops(&stop, flop); 
  printf("[OK]\n");
  printf("Running PRCD parallel lu  block using block of size %d...",200);
  fflush(stdout);
  perf(&start1);
  lu_distributed_parallel(size,A,200);
  perf(&stop1);
  perf_diff(&start1, &stop1);
  performance1= perf_mflops(&stop1, flop); 
  printf("[OK]\n");
  printf("Matrix size : %dx%d\n",size,size);
  //  printf("Cblas version : %lf Mflop/s\n",performance);
  printf("PRCD version :  %lf Mflop/s\n",performance1);
  free(A);
  printf("Initiating matrix ...");
  fflush(stdout);
  size = 20000;
  flop = (unsigned long long) 2*size*size*size/3;
  A = (double*) malloc(sizeof(double)*size*size);
  initiate_matrix(size,size,A);
  printf("[OK]\n");
  printf("Running cblas_lu ...");
  fflush(stdout);
  perf(&start);
  // LAPACKE_dgetrf(LAPACK_COL_MAJOR, size, size, A, size, aa );
  //  cblas_dtrsm(CblasColMajor, CblasLeft, 121, CblasNoTrans, CblasNonUnit,size,size,1.,A,size,B,size);
  perf(&stop);
  perf_diff(&start, &stop);
  performance = perf_mflops(&stop, flop); 
  printf("[OK]\n");
  printf("Running PRCD parallel lu  block using block of size %d...",200);
  fflush(stdout);
  perf(&start1);
  lu_distributed_parallel(size,A,200);
  perf(&stop1);
  perf_diff(&start1, &stop1);
  performance1= perf_mflops(&stop1, flop); 
  printf("[OK]\n");
  printf("Matrix size : %dx%d\n",size,size);
  //  printf("Cblas version : %lf Mflop/s\n",performance);
  printf("PRCD version :  %lf Mflop/s\n",performance1);
  free(A);


  return EXIT_SUCCESS;
}
示例#5
0
文件: driver.c 项目: lysbleu/pvm-bss
int main(int argc, char* argv[])
{
	char dummy[L2_CACHE_SIZE];
	
// Tests de performances de ddot	
	int size = 50;

	blas_t *matriceD, *matriceE;
	alloc_vecteur(&matriceD, size);
	alloc_vecteur(&matriceE, size);

	printf("Tests de performance de la fonction ddot\n");
	perf_t *t1, *t2,*t3, *t4,*t5, *t6,*t7, *t8, *t9, *t10;
	t1 = malloc(sizeof(perf_t));
	t2 = malloc(sizeof(perf_t));
        t3 = malloc(sizeof(perf_t));
	t4 = malloc(sizeof(perf_t));
        t5 = malloc(sizeof(perf_t));
	t6 = malloc(sizeof(perf_t));
        t7 = malloc(sizeof(perf_t));
	t8 = malloc(sizeof(perf_t));
        t9 = malloc(sizeof(perf_t));
	t10 = malloc(sizeof(perf_t));
        

	double mflops, mflops1,mflops2,mflops3,mflops4, mflops5;
	char command[200];

        system("rm results/ddot_perf.txt");
	for(size = 50; size < 100000000; size += size/4)
	{
		printf("M: %d ", size);
		if(size != 50)
		{
			free(matriceD);
			free(matriceE);
			alloc_vecteur(&matriceD, size);
			alloc_vecteur(&matriceE, size);
		}
		memset(dummy, 0, sizeof(dummy));
		perf(t1);
		blas_t res = cblas_ddot(size, matriceD, 1, matriceE, 1);
		perf(t2);
		perf_diff(t1, t2);
		mflops = perf_mflops(t2, 2 * size);
                printf("Mflops/s: %le\n", mflops);

                sprintf(command, "echo %d %lf >> results/ddot_perf.txt", size, mflops);	
                system(command);
				
	}


// Test de performance dgemm
//////////////////////////////////////////

	long m = 100;

	
	blas_t *matriceA, *matriceB, *matriceC;
	
	alloc_matrice(&matriceA, m, m);
	alloc_matrice(&matriceB, m, m);
        matriceC = calloc(m*m,sizeof(blas_t));
        system("rm results/dgemm_perf.txt");

	for(; m< 1000; m+=20)
	{
            printf("M: %d ", m);
        
		if(m != 100)
		{
			free(matriceA);
			free(matriceB);
			free(matriceC);
			alloc_matrice(&matriceA, m, m);
			alloc_matrice(&matriceB, m, m);
			alloc_matrice(&matriceC, m, m);
		}
        
        memset(dummy, 0, sizeof(dummy));
		perf(t1);
        cblas_dgemm_scalaire( CblasNoTrans, CblasNoTrans ,m, m, m, 1, matriceA, m, matriceB, m, 1, matriceC, m);
		perf(t2);
		perf_diff(t1, t2);
                mflops1 = perf_mflops(t2, m * m * m * 3 + m * m );

                
        
		perf(t3);
		cblas_dgemm_scalaire1(matriceC, m, matriceA, m, matriceB, m,  m);
		perf(t4);
		perf_diff(t3, t4);
        
        
                
                mflops2 = perf_mflops(t4, m * m * m * 3);
              	perf(t5);
		cblas_dgemm_scalaire2(matriceC, m, matriceA, m, matriceB, m,  m);
		perf(t6);
		perf_diff(t5, t6);
                mflops3 = perf_mflops(t6, m * m * m * 3);
                perf(t7);
		cblas_dgemm_scalaire3(matriceC, m, matriceA, m, matriceB, m,  m);
		perf(t8);
		perf_diff(t7, t8);
                mflops4 = perf_mflops(t8, m * m * m * 3);
                
                perf(t9);
		cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, m, m,m, 1, matriceA, m, matriceB, m, 1, matriceC, m);
		perf(t10);
		perf_diff(t9, t10);
                mflops5 = perf_mflops(t10, m * m * m * 3);
                
                sprintf(command, "echo %d %lf %lf %lf %lf %lf >> results/dgemm_perf.txt", m * m, mflops1, mflops2, mflops3, mflops4, mflops5);	
                system(command);
                printf("Mflops/s : %d %lf %lf %lf %lf %lf\n", m * m, mflops1, mflops2, mflops3, mflops4, mflops5 );
	}


	free(matriceA);
	free(matriceB);
	free(matriceC);
	free(matriceD);
	free(matriceE);
	return EXIT_SUCCESS;
}