Beispiel #1
0
void test_question_four_one(){
  
  double* A = matrix_alloc(2,2);
  double* B = matrix_alloc(2,2);
  double* C = matrix_alloc(2,2);
  matrix_init_ones(C,2,2);
  
  int i =0;
  for (i=0 ; i<4 ; i++) {
    A[i] = i+1;
    B[i] = i+5;
  }

  printf("Matrix A:\n");
  affiche(2,2,A,2,stdout);


  
  printf("Matrix B:\n");
  affiche(2,2,B,2,stdout);

  
  cblas_dgemm_scalaire(CblasColMajor, CblasTrans, CblasNoTrans, 2, 2, 2, 1, A, 2, B, 2, 0, C, 2);

  printf("\nMatrix C:\n");
  affiche(2,2,C,2,stdout);

  // 17 39 
  // 23 53

  free(A);
  free(B);
  free(C);
}
Beispiel #2
0
void prod_matrix(int N, int Nb, int myrank,
	double* bl_a, double* bl_b, double* bl_c,
	MPI_Comm comm_grid, MPI_Comm comm_col, MPI_Comm comm_row)
{
	int k;
	double my_a[Nb*Nb];
	int coords[2];
	MPI_Status st;

	int gd = N/Nb;
	for (int i = 0; i < Nb*Nb; ++i)
	{
		my_a[i] = bl_a[i];
	}

	MPI_Cart_coords(comm_grid, myrank, 2, coords);
	int sndto = (((coords[0]-1)%gd) +gd) %gd;
	int recvfrom = (coords[0]+1)%gd;
	int myrow = coords[0];
	int mycol = coords[1];

	for (k = 0; k < gd; k++)
	{	
		/* If I am i+k%N proc of the line
		 * 	Bcast_line(A[i][i+k%N]) 
		 * Else
		 * 	recv(A) from the i+k%N proc of the line
		 */

		 if(mycol == (myrow+k)%gd)
		 {
		 	for (int i = 0; i < Nb*Nb; ++i)
		 	{
		 		bl_a[i] = my_a[i];
		 	}
		 }
		MPI_Bcast(bl_a, Nb*Nb, MPI_DOUBLE, (myrow+k)%gd, comm_row);

		cblas_dgemm_scalaire(Nb, bl_a, bl_b, bl_c);  //Cij = A[i][i+k%N]*B[i+k%N][j]
			/* send(B) to upper neighbour
			 */
		MPI_Sendrecv_replace(bl_b, Nb*Nb, MPI_DOUBLE, sndto, 0, 
			recvfrom, 0, comm_col, &st);
	}
}
Beispiel #3
0
int main(int argc, char* argv[])
{
	char dummy[L2_CACHE_SIZE];
	
// Tests de performances de ddot	
	int size = 50;

	blas_t *matriceD, *matriceE;
	alloc_vecteur(&matriceD, size);
	alloc_vecteur(&matriceE, size);

	printf("Tests de performance de la fonction ddot\n");
	perf_t *t1, *t2,*t3, *t4,*t5, *t6,*t7, *t8, *t9, *t10;
	t1 = malloc(sizeof(perf_t));
	t2 = malloc(sizeof(perf_t));
        t3 = malloc(sizeof(perf_t));
	t4 = malloc(sizeof(perf_t));
        t5 = malloc(sizeof(perf_t));
	t6 = malloc(sizeof(perf_t));
        t7 = malloc(sizeof(perf_t));
	t8 = malloc(sizeof(perf_t));
        t9 = malloc(sizeof(perf_t));
	t10 = malloc(sizeof(perf_t));
        

	double mflops, mflops1,mflops2,mflops3,mflops4, mflops5;
	char command[200];

        system("rm results/ddot_perf.txt");
	for(size = 50; size < 100000000; size += size/4)
	{
		printf("M: %d ", size);
		if(size != 50)
		{
			free(matriceD);
			free(matriceE);
			alloc_vecteur(&matriceD, size);
			alloc_vecteur(&matriceE, size);
		}
		memset(dummy, 0, sizeof(dummy));
		perf(t1);
		blas_t res = cblas_ddot(size, matriceD, 1, matriceE, 1);
		perf(t2);
		perf_diff(t1, t2);
		mflops = perf_mflops(t2, 2 * size);
                printf("Mflops/s: %le\n", mflops);

                sprintf(command, "echo %d %lf >> results/ddot_perf.txt", size, mflops);	
                system(command);
				
	}


// Test de performance dgemm
//////////////////////////////////////////

	long m = 100;

	
	blas_t *matriceA, *matriceB, *matriceC;
	
	alloc_matrice(&matriceA, m, m);
	alloc_matrice(&matriceB, m, m);
        matriceC = calloc(m*m,sizeof(blas_t));
        system("rm results/dgemm_perf.txt");

	for(; m< 1000; m+=20)
	{
            printf("M: %d ", m);
        
		if(m != 100)
		{
			free(matriceA);
			free(matriceB);
			free(matriceC);
			alloc_matrice(&matriceA, m, m);
			alloc_matrice(&matriceB, m, m);
			alloc_matrice(&matriceC, m, m);
		}
        
        memset(dummy, 0, sizeof(dummy));
		perf(t1);
        cblas_dgemm_scalaire( CblasNoTrans, CblasNoTrans ,m, m, m, 1, matriceA, m, matriceB, m, 1, matriceC, m);
		perf(t2);
		perf_diff(t1, t2);
                mflops1 = perf_mflops(t2, m * m * m * 3 + m * m );

                
        
		perf(t3);
		cblas_dgemm_scalaire1(matriceC, m, matriceA, m, matriceB, m,  m);
		perf(t4);
		perf_diff(t3, t4);
        
        
                
                mflops2 = perf_mflops(t4, m * m * m * 3);
              	perf(t5);
		cblas_dgemm_scalaire2(matriceC, m, matriceA, m, matriceB, m,  m);
		perf(t6);
		perf_diff(t5, t6);
                mflops3 = perf_mflops(t6, m * m * m * 3);
                perf(t7);
		cblas_dgemm_scalaire3(matriceC, m, matriceA, m, matriceB, m,  m);
		perf(t8);
		perf_diff(t7, t8);
                mflops4 = perf_mflops(t8, m * m * m * 3);
                
                perf(t9);
		cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, m, m,m, 1, matriceA, m, matriceB, m, 1, matriceC, m);
		perf(t10);
		perf_diff(t9, t10);
                mflops5 = perf_mflops(t10, m * m * m * 3);
                
                sprintf(command, "echo %d %lf %lf %lf %lf %lf >> results/dgemm_perf.txt", m * m, mflops1, mflops2, mflops3, mflops4, mflops5);	
                system(command);
                printf("Mflops/s : %d %lf %lf %lf %lf %lf\n", m * m, mflops1, mflops2, mflops3, mflops4, mflops5 );
	}


	free(matriceA);
	free(matriceB);
	free(matriceC);
	free(matriceD);
	free(matriceE);
	return EXIT_SUCCESS;
}